import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from rl.agents.dqn import DQNAgent
from rl.agents.ddpg import DDPGAgent
from rl.policy import BoltzmannGumbelQPolicy , LinearAnnealedPolicy , EpsGreedyQPolicy
from rl.memory import SequentialMemory
env = gym.make('MsPacman-v0')
nb_actions = env.action_space.n
nb_actions
9
agents representation of the environment
# Next, we build a neural network model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(3, activation= 'tanh')) # layer 1: 3 cells with tanh activation function
model.add(Dense(nb_actions))
model.add(Dense(6, activation= 'sigmoid')) #layer 2 : 6 cells with sigmoid activation function
model.add(Dense(nb_actions))
model.add(Dense(6, activation= 'sigmoid')) #layer 3 : 6 cells with sigmoid activation function
model.add(Dense(nb_actions))
model.add(Dense(6, activation= 'sigmoid')) #layer 4 : 6 cells with sigmoid activation function
model.add(Dense(nb_actions))
model.add(Dense(3, activation= 'tanh')) #layer 5 : 3 cells with tanh activation function
model.add(Dense(nb_actions))
model.add(Dense(3, activation= 'sigmoid')) #layer 6 : 6 cells with sigmoid activation function
model.add(Dense(nb_actions))
model.add(Activation('softmax')) # one layer of 1 unit with sigmoid activation function
print(model.summary())
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_1 (Flatten) (None, 100800) 0 _________________________________________________________________ dense_1 (Dense) (None, 3) 302403 _________________________________________________________________ dense_2 (Dense) (None, 9) 36 _________________________________________________________________ dense_3 (Dense) (None, 6) 60 _________________________________________________________________ dense_4 (Dense) (None, 9) 63 _________________________________________________________________ dense_5 (Dense) (None, 6) 60 _________________________________________________________________ dense_6 (Dense) (None, 9) 63 _________________________________________________________________ dense_7 (Dense) (None, 6) 60 _________________________________________________________________ dense_8 (Dense) (None, 9) 63 _________________________________________________________________ dense_9 (Dense) (None, 3) 30 _________________________________________________________________ dense_10 (Dense) (None, 9) 36 _________________________________________________________________ dense_11 (Dense) (None, 3) 30 _________________________________________________________________ dense_12 (Dense) (None, 9) 36 _________________________________________________________________ activation_1 (Activation) (None, 9) 0 ================================================================= Total params: 302,940 Trainable params: 302,940 Non-trainable params: 0 _________________________________________________________________ None
#Configure and compile the agent & policy.
memory = SequentialMemory(limit=100000, window_length=1)
policy = BoltzmannGumbelQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
## Visualize the training during 5000000 steps
## Each episode is a game in wich Pacman has two lifes
### When nb_steps is not long enough , a warning might appear
### "Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!"
dqn.fit(env, nb_steps=100000, visualize=True, verbose=2)
Training for 100000 steps ... 922/100000: episode: 1, duration: 29.546s, episode steps: 922, steps per second: 31, episode reward: 370.000, mean reward: 0.401 [0.000, 10.000], mean action: 3.359 [0.000, 8.000], mean observation: 72.595 [0.000, 228.000], loss: 2.750501, mean_absolute_error: 0.159018, acc: 0.409802, mean_q: 0.655422 1355/100000: episode: 2, duration: 12.858s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [1.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 1.932692, mean_absolute_error: 0.100334, acc: 0.736865, mean_q: 0.939665 1794/100000: episode: 3, duration: 12.995s, episode steps: 439, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.030 [0.000, 7.000], mean observation: 72.906 [0.000, 228.000], loss: 1.599364, mean_absolute_error: 0.077912, acc: 0.806948, mean_q: 0.964618 2230/100000: episode: 4, duration: 13.087s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [0.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.493305, mean_absolute_error: 0.067161, acc: 0.844252, mean_q: 0.977191 2695/100000: episode: 5, duration: 19.184s, episode steps: 465, steps per second: 24, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.015 [0.000, 8.000], mean observation: 72.873 [0.000, 228.000], loss: 1.364146, mean_absolute_error: 0.060264, acc: 0.862769, mean_q: 0.984469 3244/100000: episode: 6, duration: 17.063s, episode steps: 549, steps per second: 32, episode reward: 60.000, mean reward: 0.109 [0.000, 10.000], mean action: 3.002 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 1.354492, mean_absolute_error: 0.054776, acc: 0.885986, mean_q: 0.989604 3764/100000: episode: 7, duration: 16.936s, episode steps: 520, steps per second: 31, episode reward: 110.000, mean reward: 0.212 [0.000, 10.000], mean action: 3.033 [2.000, 8.000], mean observation: 72.857 [0.000, 228.000], loss: 1.285808, mean_absolute_error: 0.049860, acc: 0.902103, mean_q: 0.992924 4286/100000: episode: 8, duration: 16.436s, episode steps: 522, steps per second: 32, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.021 [0.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 1.222823, mean_absolute_error: 0.046015, acc: 0.913374, mean_q: 0.995000 4818/100000: episode: 9, duration: 16.453s, episode steps: 532, steps per second: 32, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 2.994 [0.000, 6.000], mean observation: 72.854 [0.000, 228.000], loss: 1.269515, mean_absolute_error: 0.045624, acc: 0.919290, mean_q: 0.996452 5240/100000: episode: 10, duration: 13.077s, episode steps: 422, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.012 [2.000, 8.000], mean observation: 72.910 [0.000, 228.000], loss: 1.165863, mean_absolute_error: 0.041262, acc: 0.929354, mean_q: 0.997315 5660/100000: episode: 11, duration: 12.973s, episode steps: 420, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 3.007 [0.000, 8.000], mean observation: 72.908 [0.000, 228.000], loss: 1.207981, mean_absolute_error: 0.041799, acc: 0.931696, mean_q: 0.997878 6188/100000: episode: 12, duration: 17.165s, episode steps: 528, steps per second: 31, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.019 [0.000, 8.000], mean observation: 72.858 [0.000, 228.000], loss: 1.079953, mean_absolute_error: 0.037732, acc: 0.937855, mean_q: 0.998365 7318/100000: episode: 13, duration: 36.109s, episode steps: 1130, steps per second: 31, episode reward: 180.000, mean reward: 0.159 [0.000, 50.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 1.126516, mean_absolute_error: 0.036324, acc: 0.942893, mean_q: 0.998921 7749/100000: episode: 14, duration: 13.283s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.242681, mean_absolute_error: 0.034535, acc: 0.947288, mean_q: 0.999283 8193/100000: episode: 15, duration: 13.465s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 1.276073, mean_absolute_error: 0.034827, acc: 0.949747, mean_q: 0.999426 8626/100000: episode: 16, duration: 13.100s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 1.219086, mean_absolute_error: 0.032687, acc: 0.953161, mean_q: 0.999536 9058/100000: episode: 17, duration: 13.008s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.966979, mean_absolute_error: 0.031524, acc: 0.955295, mean_q: 0.999627 9568/100000: episode: 18, duration: 15.549s, episode steps: 510, steps per second: 33, episode reward: 110.000, mean reward: 0.216 [0.000, 10.000], mean action: 2.994 [1.000, 4.000], mean observation: 72.859 [0.000, 228.000], loss: 0.906498, mean_absolute_error: 0.029293, acc: 0.959559, mean_q: 0.999698 9996/100000: episode: 19, duration: 13.345s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.993 [0.000, 5.000], mean observation: 72.908 [0.000, 228.000], loss: 0.958007, mean_absolute_error: 0.029338, acc: 0.957579, mean_q: 0.999756 10419/100000: episode: 20, duration: 13.323s, episode steps: 423, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 1.070048, mean_absolute_error: 0.031253, acc: 0.959146, mean_q: 0.999802 10851/100000: episode: 21, duration: 13.485s, episode steps: 432, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 1.149229, mean_absolute_error: 0.031137, acc: 0.961010, mean_q: 0.999839 11429/100000: episode: 22, duration: 17.292s, episode steps: 578, steps per second: 33, episode reward: 110.000, mean reward: 0.190 [0.000, 10.000], mean action: 3.012 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 1.168064, mean_absolute_error: 0.029637, acc: 0.962695, mean_q: 0.999876 11944/100000: episode: 23, duration: 15.214s, episode steps: 515, steps per second: 34, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 2.992 [0.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 1.067216, mean_absolute_error: 0.030874, acc: 0.961650, mean_q: 0.999906 12384/100000: episode: 24, duration: 13.070s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.935310, mean_absolute_error: 0.026834, acc: 0.966264, mean_q: 0.999924 12965/100000: episode: 25, duration: 17.462s, episode steps: 581, steps per second: 33, episode reward: 70.000, mean reward: 0.120 [0.000, 10.000], mean action: 3.014 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 1.057993, mean_absolute_error: 0.028709, acc: 0.966652, mean_q: 0.999940 13391/100000: episode: 26, duration: 12.881s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.865644, mean_absolute_error: 0.026880, acc: 0.966623, mean_q: 0.999953 13919/100000: episode: 27, duration: 15.738s, episode steps: 528, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 1.009845, mean_absolute_error: 0.028659, acc: 0.966974, mean_q: 0.999963 14350/100000: episode: 28, duration: 12.788s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.837868, mean_absolute_error: 0.025444, acc: 0.970780, mean_q: 0.999970 14775/100000: episode: 29, duration: 12.599s, episode steps: 425, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.885915, mean_absolute_error: 0.027345, acc: 0.966765, mean_q: 0.999976 15202/100000: episode: 30, duration: 12.630s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 1.067898, mean_absolute_error: 0.027376, acc: 0.970653, mean_q: 0.999980 15628/100000: episode: 31, duration: 12.637s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.926451, mean_absolute_error: 0.027283, acc: 0.971097, mean_q: 0.999984 16057/100000: episode: 32, duration: 12.722s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804952, mean_absolute_error: 0.024675, acc: 0.971372, mean_q: 0.999987 16588/100000: episode: 33, duration: 15.744s, episode steps: 531, steps per second: 34, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 3.013 [2.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 0.925594, mean_absolute_error: 0.027047, acc: 0.972575, mean_q: 0.999990 17105/100000: episode: 34, duration: 15.288s, episode steps: 517, steps per second: 34, episode reward: 60.000, mean reward: 0.116 [0.000, 10.000], mean action: 3.008 [3.000, 5.000], mean observation: 72.920 [0.000, 228.000], loss: 0.892844, mean_absolute_error: 0.025035, acc: 0.971893, mean_q: 0.999992 17536/100000: episode: 35, duration: 12.722s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.973951, mean_absolute_error: 0.025876, acc: 0.975348, mean_q: 0.999994 17974/100000: episode: 36, duration: 13.189s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.910 [0.000, 228.000], loss: 0.845903, mean_absolute_error: 0.023325, acc: 0.974244, mean_q: 0.999995 18405/100000: episode: 37, duration: 12.907s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.885737, mean_absolute_error: 0.025633, acc: 0.974985, mean_q: 0.999996 18834/100000: episode: 38, duration: 12.750s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.966934, mean_absolute_error: 0.028101, acc: 0.971882, mean_q: 0.999997 19299/100000: episode: 39, duration: 13.883s, episode steps: 465, steps per second: 33, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.004 [1.000, 6.000], mean observation: 72.894 [0.000, 228.000], loss: 0.950698, mean_absolute_error: 0.025855, acc: 0.973790, mean_q: 0.999997 19725/100000: episode: 40, duration: 12.708s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.016 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.934569, mean_absolute_error: 0.025027, acc: 0.975719, mean_q: 0.999998 20161/100000: episode: 41, duration: 12.911s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.966219, mean_absolute_error: 0.025600, acc: 0.975989, mean_q: 0.999998 20590/100000: episode: 42, duration: 12.721s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831869, mean_absolute_error: 0.024158, acc: 0.976034, mean_q: 0.999999 21022/100000: episode: 43, duration: 12.844s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 1.055637, mean_absolute_error: 0.027234, acc: 0.977937, mean_q: 0.999999 21455/100000: episode: 44, duration: 12.819s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836770, mean_absolute_error: 0.023797, acc: 0.978493, mean_q: 0.999999 21889/100000: episode: 45, duration: 12.940s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.836639, mean_absolute_error: 0.023856, acc: 0.977823, mean_q: 0.999999 22343/100000: episode: 46, duration: 13.496s, episode steps: 454, steps per second: 34, episode reward: 110.000, mean reward: 0.242 [0.000, 10.000], mean action: 3.009 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.805641, mean_absolute_error: 0.023271, acc: 0.977905, mean_q: 0.999999 22777/100000: episode: 47, duration: 12.854s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 1.006345, mean_absolute_error: 0.026670, acc: 0.974942, mean_q: 1.000000 23214/100000: episode: 48, duration: 12.924s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.844489, mean_absolute_error: 0.023875, acc: 0.979334, mean_q: 1.000000 23641/100000: episode: 49, duration: 12.627s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.983037, mean_absolute_error: 0.026869, acc: 0.979435, mean_q: 1.000000 24067/100000: episode: 50, duration: 12.623s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.009 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.934281, mean_absolute_error: 0.025823, acc: 0.979020, mean_q: 1.000000 24494/100000: episode: 51, duration: 12.634s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.988 [0.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.891939, mean_absolute_error: 0.025250, acc: 0.977532, mean_q: 1.000000 24932/100000: episode: 52, duration: 12.988s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.876506, mean_absolute_error: 0.024347, acc: 0.979880, mean_q: 1.000000 25362/100000: episode: 53, duration: 12.794s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.817384, mean_absolute_error: 0.022920, acc: 0.980959, mean_q: 1.000000 25784/100000: episode: 54, duration: 12.561s, episode steps: 422, steps per second: 34, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.002 [1.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.859725, mean_absolute_error: 0.023460, acc: 0.982968, mean_q: 1.000000 26222/100000: episode: 55, duration: 12.969s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.877860, mean_absolute_error: 0.020972, acc: 0.981949, mean_q: 1.000000 26654/100000: episode: 56, duration: 12.859s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.893530, mean_absolute_error: 0.025140, acc: 0.978516, mean_q: 1.000000 27184/100000: episode: 57, duration: 15.685s, episode steps: 530, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.849 [0.000, 228.000], loss: 0.871484, mean_absolute_error: 0.024115, acc: 0.980837, mean_q: 1.000000 27647/100000: episode: 58, duration: 13.676s, episode steps: 463, steps per second: 34, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 2.998 [1.000, 4.000], mean observation: 72.896 [0.000, 228.000], loss: 0.882798, mean_absolute_error: 0.022712, acc: 0.981776, mean_q: 1.000000 28083/100000: episode: 59, duration: 12.928s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 0.924782, mean_absolute_error: 0.024951, acc: 0.982870, mean_q: 1.000000 28515/100000: episode: 60, duration: 13.170s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [1.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 0.950630, mean_absolute_error: 0.025596, acc: 0.982784, mean_q: 1.000000 28957/100000: episode: 61, duration: 13.095s, episode steps: 442, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 1.017386, mean_absolute_error: 0.025839, acc: 0.980345, mean_q: 1.000000 29391/100000: episode: 62, duration: 12.831s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.917 [0.000, 228.000], loss: 0.819406, mean_absolute_error: 0.023005, acc: 0.980703, mean_q: 1.000000 29824/100000: episode: 63, duration: 12.812s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.993344, mean_absolute_error: 0.026588, acc: 0.982318, mean_q: 1.000000 30267/100000: episode: 64, duration: 13.512s, episode steps: 443, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.989 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.812653, mean_absolute_error: 0.022780, acc: 0.980954, mean_q: 1.000000 30707/100000: episode: 65, duration: 13.171s, episode steps: 440, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.804780, mean_absolute_error: 0.023003, acc: 0.979119, mean_q: 1.000000 31137/100000: episode: 66, duration: 12.918s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.958653, mean_absolute_error: 0.022369, acc: 0.983358, mean_q: 1.000000 31567/100000: episode: 67, duration: 13.041s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842303, mean_absolute_error: 0.023308, acc: 0.981904, mean_q: 1.000000 31999/100000: episode: 68, duration: 12.833s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.969986, mean_absolute_error: 0.024267, acc: 0.983362, mean_q: 1.000000 32434/100000: episode: 69, duration: 12.959s, episode steps: 435, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.943249, mean_absolute_error: 0.023929, acc: 0.981968, mean_q: 1.000000 32860/100000: episode: 70, duration: 12.618s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [1.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.853826, mean_absolute_error: 0.022960, acc: 0.984815, mean_q: 1.000000 33301/100000: episode: 71, duration: 13.135s, episode steps: 441, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.945814, mean_absolute_error: 0.023591, acc: 0.983985, mean_q: 1.000000 33738/100000: episode: 72, duration: 12.972s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.853012, mean_absolute_error: 0.023421, acc: 0.982194, mean_q: 1.000000 34169/100000: episode: 73, duration: 12.839s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [0.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.837158, mean_absolute_error: 0.022482, acc: 0.985426, mean_q: 1.000000 34595/100000: episode: 74, duration: 12.850s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [2.000, 6.000], mean observation: 72.906 [0.000, 228.000], loss: 0.903527, mean_absolute_error: 0.022658, acc: 0.983715, mean_q: 1.000000 35022/100000: episode: 75, duration: 12.748s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [0.000, 4.000], mean observation: 72.911 [0.000, 228.000], loss: 0.792673, mean_absolute_error: 0.021522, acc: 0.984778, mean_q: 1.000000 35448/100000: episode: 76, duration: 12.698s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.908 [0.000, 228.000], loss: 0.867522, mean_absolute_error: 0.023318, acc: 0.983788, mean_q: 1.000000 35875/100000: episode: 77, duration: 12.782s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.835425, mean_absolute_error: 0.021034, acc: 0.984265, mean_q: 1.000000 36306/100000: episode: 78, duration: 12.901s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 5.000], mean observation: 72.917 [0.000, 228.000], loss: 0.875452, mean_absolute_error: 0.022229, acc: 0.982961, mean_q: 1.000000 36740/100000: episode: 79, duration: 12.985s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.947098, mean_absolute_error: 0.025119, acc: 0.984087, mean_q: 1.000000 37166/100000: episode: 80, duration: 12.678s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.748542, mean_absolute_error: 0.020739, acc: 0.984522, mean_q: 1.000000 37612/100000: episode: 81, duration: 13.315s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.996 [1.000, 3.000], mean observation: 72.920 [0.000, 228.000], loss: 0.752258, mean_absolute_error: 0.020643, acc: 0.984725, mean_q: 1.000000 38046/100000: episode: 82, duration: 12.966s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.746647, mean_absolute_error: 0.020482, acc: 0.985383, mean_q: 1.000000 38482/100000: episode: 83, duration: 12.969s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842648, mean_absolute_error: 0.022776, acc: 0.984088, mean_q: 1.000000 38998/100000: episode: 84, duration: 15.467s, episode steps: 516, steps per second: 33, episode reward: 110.000, mean reward: 0.213 [0.000, 10.000], mean action: 3.010 [0.000, 8.000], mean observation: 72.853 [0.000, 228.000], loss: 0.791431, mean_absolute_error: 0.021996, acc: 0.982316, mean_q: 1.000000 39430/100000: episode: 85, duration: 12.932s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.770487, mean_absolute_error: 0.021347, acc: 0.983507, mean_q: 1.000000 39870/100000: episode: 86, duration: 13.094s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.851223, mean_absolute_error: 0.022746, acc: 0.985653, mean_q: 1.000000 40298/100000: episode: 87, duration: 12.801s, episode steps: 428, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.866558, mean_absolute_error: 0.023239, acc: 0.984959, mean_q: 1.000000 40738/100000: episode: 88, duration: 13.130s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.814577, mean_absolute_error: 0.021974, acc: 0.985369, mean_q: 1.000000 41180/100000: episode: 89, duration: 13.200s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [1.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.791810, mean_absolute_error: 0.021386, acc: 0.985506, mean_q: 1.000000 41702/100000: episode: 90, duration: 15.572s, episode steps: 522, steps per second: 34, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 0.927495, mean_absolute_error: 0.024034, acc: 0.987428, mean_q: 1.000000 42138/100000: episode: 91, duration: 13.054s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.918 [0.000, 228.000], loss: 0.935920, mean_absolute_error: 0.023190, acc: 0.985163, mean_q: 1.000000 42568/100000: episode: 92, duration: 12.831s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.749676, mean_absolute_error: 0.020661, acc: 0.984448, mean_q: 1.000000 43039/100000: episode: 93, duration: 13.984s, episode steps: 471, steps per second: 34, episode reward: 110.000, mean reward: 0.234 [0.000, 10.000], mean action: 3.011 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.832054, mean_absolute_error: 0.022190, acc: 0.985801, mean_q: 1.000000 43473/100000: episode: 94, duration: 12.932s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.718739, mean_absolute_error: 0.019606, acc: 0.986679, mean_q: 1.000000 43904/100000: episode: 95, duration: 12.903s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.790879, mean_absolute_error: 0.020783, acc: 0.988399, mean_q: 1.000000 44329/100000: episode: 96, duration: 12.874s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.912065, mean_absolute_error: 0.022168, acc: 0.987132, mean_q: 1.000000 44763/100000: episode: 97, duration: 13.024s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [2.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.837879, mean_absolute_error: 0.020969, acc: 0.984375, mean_q: 1.000000 45290/100000: episode: 98, duration: 15.742s, episode steps: 527, steps per second: 33, episode reward: 110.000, mean reward: 0.209 [0.000, 10.000], mean action: 3.009 [3.000, 8.000], mean observation: 72.863 [0.000, 228.000], loss: 0.899594, mean_absolute_error: 0.023656, acc: 0.986717, mean_q: 1.000000 45714/100000: episode: 99, duration: 13.066s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.795141, mean_absolute_error: 0.020977, acc: 0.988060, mean_q: 1.000000 46150/100000: episode: 100, duration: 13.063s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.718090, mean_absolute_error: 0.019679, acc: 0.985737, mean_q: 1.000000 46580/100000: episode: 101, duration: 12.913s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.697030, mean_absolute_error: 0.019125, acc: 0.986773, mean_q: 1.000000 47013/100000: episode: 102, duration: 13.298s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.739714, mean_absolute_error: 0.019914, acc: 0.986937, mean_q: 1.000000 47448/100000: episode: 103, duration: 13.779s, episode steps: 435, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.738085, mean_absolute_error: 0.020341, acc: 0.984770, mean_q: 1.000000 47889/100000: episode: 104, duration: 13.802s, episode steps: 441, steps per second: 32, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.832484, mean_absolute_error: 0.021954, acc: 0.986820, mean_q: 1.000000 48319/100000: episode: 105, duration: 12.800s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.798077, mean_absolute_error: 0.021110, acc: 0.987718, mean_q: 1.000000 48752/100000: episode: 106, duration: 12.868s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.888920, mean_absolute_error: 0.021588, acc: 0.987009, mean_q: 1.000000 49181/100000: episode: 107, duration: 12.792s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.817413, mean_absolute_error: 0.021793, acc: 0.986233, mean_q: 1.000000 49614/100000: episode: 108, duration: 12.875s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804071, mean_absolute_error: 0.021243, acc: 0.987803, mean_q: 1.000000 50042/100000: episode: 109, duration: 13.319s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.796233, mean_absolute_error: 0.021130, acc: 0.987077, mean_q: 1.000000 50472/100000: episode: 110, duration: 13.205s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.909 [0.000, 228.000], loss: 0.958833, mean_absolute_error: 0.023197, acc: 0.987064, mean_q: 1.000000 51182/100000: episode: 111, duration: 21.496s, episode steps: 710, steps per second: 33, episode reward: 780.000, mean reward: 1.099 [0.000, 400.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.750635, mean_absolute_error: 0.022866, acc: 0.987192, mean_q: 1.000000 51616/100000: episode: 112, duration: 12.981s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 2.289796, mean_absolute_error: 0.022587, acc: 0.986247, mean_q: 1.000000 52052/100000: episode: 113, duration: 13.128s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 6.869392, mean_absolute_error: 0.026987, acc: 0.988174, mean_q: 1.000000 52486/100000: episode: 114, duration: 12.987s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.911 [0.000, 228.000], loss: 0.908648, mean_absolute_error: 0.021901, acc: 0.987975, mean_q: 1.000000 53028/100000: episode: 115, duration: 16.227s, episode steps: 542, steps per second: 33, episode reward: 110.000, mean reward: 0.203 [0.000, 10.000], mean action: 3.015 [1.000, 8.000], mean observation: 72.861 [0.000, 228.000], loss: 0.927577, mean_absolute_error: 0.021164, acc: 0.988872, mean_q: 1.000000 53549/100000: episode: 116, duration: 15.605s, episode steps: 521, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 6.778060, mean_absolute_error: 0.024810, acc: 0.987224, mean_q: 1.000000 53976/100000: episode: 117, duration: 12.945s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.895132, mean_absolute_error: 0.023102, acc: 0.988876, mean_q: 1.000000 54417/100000: episode: 118, duration: 13.326s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.815021, mean_absolute_error: 0.021397, acc: 0.987883, mean_q: 1.000000 54848/100000: episode: 119, duration: 12.849s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 6.555128, mean_absolute_error: 0.023074, acc: 0.989342, mean_q: 1.000000 55278/100000: episode: 120, duration: 12.983s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.788291, mean_absolute_error: 0.020740, acc: 0.988517, mean_q: 1.000000 55712/100000: episode: 121, duration: 13.138s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.905 [0.000, 228.000], loss: 0.701904, mean_absolute_error: 0.018827, acc: 0.988767, mean_q: 1.000000 56144/100000: episode: 122, duration: 12.976s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.016 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 6.674213, mean_absolute_error: 0.026572, acc: 0.986400, mean_q: 1.000000 56584/100000: episode: 123, duration: 13.134s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 6.481470, mean_absolute_error: 0.024026, acc: 0.989205, mean_q: 1.000000 57010/100000: episode: 124, duration: 12.779s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 2.567588, mean_absolute_error: 0.024750, acc: 0.987089, mean_q: 1.000000 57440/100000: episode: 125, duration: 12.885s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 2.430678, mean_absolute_error: 0.023412, acc: 0.988154, mean_q: 1.000000 57878/100000: episode: 126, duration: 13.035s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 2.234050, mean_absolute_error: 0.021098, acc: 0.989155, mean_q: 1.000000 58316/100000: episode: 127, duration: 13.131s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 1.070799, mean_absolute_error: 0.022651, acc: 0.986658, mean_q: 1.000000 58750/100000: episode: 128, duration: 13.015s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.919 [0.000, 228.000], loss: 3.657577, mean_absolute_error: 0.023798, acc: 0.988119, mean_q: 1.000000 59176/100000: episode: 129, duration: 12.747s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 2.296508, mean_absolute_error: 0.023142, acc: 0.989583, mean_q: 1.000000 59653/100000: episode: 130, duration: 14.280s, episode steps: 477, steps per second: 33, episode reward: 110.000, mean reward: 0.231 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.867 [0.000, 228.000], loss: 0.889277, mean_absolute_error: 0.021522, acc: 0.988273, mean_q: 1.000000 60084/100000: episode: 131, duration: 12.940s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.917 [0.000, 228.000], loss: 0.812250, mean_absolute_error: 0.021473, acc: 0.987602, mean_q: 1.000000 60510/100000: episode: 132, duration: 12.855s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 6.668528, mean_absolute_error: 0.024226, acc: 0.987969, mean_q: 1.000000 60951/100000: episode: 133, duration: 13.184s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.011 [3.000, 6.000], mean observation: 72.905 [0.000, 228.000], loss: 6.516801, mean_absolute_error: 0.023675, acc: 0.988450, mean_q: 1.000000 61377/100000: episode: 134, duration: 12.783s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.774365, mean_absolute_error: 0.020130, acc: 0.990023, mean_q: 1.000000 61807/100000: episode: 135, duration: 12.900s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.730819, mean_absolute_error: 0.019347, acc: 0.988808, mean_q: 1.000000 62241/100000: episode: 136, duration: 12.967s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.739101, mean_absolute_error: 0.019514, acc: 0.989127, mean_q: 1.000000 62688/100000: episode: 137, duration: 13.364s, episode steps: 447, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.831790, mean_absolute_error: 0.021591, acc: 0.988465, mean_q: 1.000000 63121/100000: episode: 138, duration: 13.000s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 7.999557, mean_absolute_error: 0.025455, acc: 0.988380, mean_q: 1.000000 63546/100000: episode: 139, duration: 13.515s, episode steps: 425, steps per second: 31, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 6.665014, mean_absolute_error: 0.023986, acc: 0.988235, mean_q: 1.000000 63987/100000: episode: 140, duration: 13.334s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.847174, mean_absolute_error: 0.021910, acc: 0.988662, mean_q: 1.000000 64433/100000: episode: 141, duration: 13.484s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.801544, mean_absolute_error: 0.021092, acc: 0.988299, mean_q: 1.000000 64862/100000: episode: 142, duration: 12.891s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.596956, mean_absolute_error: 0.023347, acc: 0.989948, mean_q: 1.000000 65301/100000: episode: 143, duration: 13.241s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.756605, mean_absolute_error: 0.020061, acc: 0.988468, mean_q: 1.000000 65740/100000: episode: 144, duration: 13.186s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.014 [3.000, 8.000], mean observation: 72.913 [0.000, 228.000], loss: 0.754340, mean_absolute_error: 0.019724, acc: 0.990248, mean_q: 1.000000 66175/100000: episode: 145, duration: 13.118s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 13.629837, mean_absolute_error: 0.026191, acc: 0.991738, mean_q: 1.000000 66613/100000: episode: 146, duration: 13.218s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.728253, mean_absolute_error: 0.019346, acc: 0.988870, mean_q: 1.000000 67050/100000: episode: 147, duration: 13.268s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.771391, mean_absolute_error: 0.020079, acc: 0.990132, mean_q: 1.000000 67487/100000: episode: 148, duration: 13.208s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 1.055708, mean_absolute_error: 0.023504, acc: 0.988630, mean_q: 1.000000 67917/100000: episode: 149, duration: 13.018s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.904 [0.000, 228.000], loss: 0.734914, mean_absolute_error: 0.019412, acc: 0.989390, mean_q: 1.000000 68349/100000: episode: 150, duration: 13.049s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.879950, mean_absolute_error: 0.020989, acc: 0.988860, mean_q: 1.000000 68776/100000: episode: 151, duration: 12.993s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.910 [0.000, 228.000], loss: 0.810485, mean_absolute_error: 0.021195, acc: 0.988583, mean_q: 1.000000 69200/100000: episode: 152, duration: 13.099s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.769201, mean_absolute_error: 0.024035, acc: 0.989608, mean_q: 1.000000 69634/100000: episode: 153, duration: 13.483s, episode steps: 434, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 6.727956, mean_absolute_error: 0.025991, acc: 0.990351, mean_q: 1.000000 70065/100000: episode: 154, duration: 13.497s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831990, mean_absolute_error: 0.019917, acc: 0.988907, mean_q: 1.000000 70500/100000: episode: 155, duration: 13.260s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.903836, mean_absolute_error: 0.021813, acc: 0.987644, mean_q: 1.000000 70924/100000: episode: 156, duration: 12.851s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.813252, mean_absolute_error: 0.020972, acc: 0.990050, mean_q: 1.000000 71351/100000: episode: 157, duration: 12.881s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.762409, mean_absolute_error: 0.019593, acc: 0.991437, mean_q: 1.000000 71898/100000: episode: 158, duration: 16.541s, episode steps: 547, steps per second: 33, episode reward: 60.000, mean reward: 0.110 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.901 [0.000, 228.000], loss: 5.462224, mean_absolute_error: 0.022918, acc: 0.989431, mean_q: 1.000000 72325/100000: episode: 159, duration: 12.927s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.815619, mean_absolute_error: 0.020754, acc: 0.991584, mean_q: 1.000000 72749/100000: episode: 160, duration: 12.806s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.805345, mean_absolute_error: 0.020817, acc: 0.989829, mean_q: 1.000000 73176/100000: episode: 161, duration: 12.953s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 6.730695, mean_absolute_error: 0.024181, acc: 0.989095, mean_q: 1.000000 73615/100000: episode: 162, duration: 13.336s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.837817, mean_absolute_error: 0.020025, acc: 0.989749, mean_q: 1.000000 74051/100000: episode: 163, duration: 13.276s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.441117, mean_absolute_error: 0.021877, acc: 0.989822, mean_q: 1.000000 74488/100000: episode: 164, duration: 13.053s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.819825, mean_absolute_error: 0.021008, acc: 0.990346, mean_q: 1.000000 74921/100000: episode: 165, duration: 13.294s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.865574, mean_absolute_error: 0.022025, acc: 0.990257, mean_q: 1.000000 75354/100000: episode: 166, duration: 13.565s, episode steps: 433, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.708961, mean_absolute_error: 0.018218, acc: 0.992350, mean_q: 1.000000 75781/100000: episode: 167, duration: 12.828s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 8.054317, mean_absolute_error: 0.024153, acc: 0.989681, mean_q: 1.000000 76295/100000: episode: 168, duration: 15.366s, episode steps: 514, steps per second: 33, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 5.656568, mean_absolute_error: 0.023127, acc: 0.990333, mean_q: 1.000000 76710/100000: episode: 169, duration: 12.637s, episode steps: 415, steps per second: 33, episode reward: 60.000, mean reward: 0.145 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.841335, mean_absolute_error: 0.019893, acc: 0.990361, mean_q: 1.000000 77142/100000: episode: 170, duration: 13.082s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.767345, mean_absolute_error: 0.017985, acc: 0.991753, mean_q: 1.000000 77592/100000: episode: 171, duration: 13.450s, episode steps: 450, steps per second: 33, episode reward: 110.000, mean reward: 0.244 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.899 [0.000, 228.000], loss: 0.689204, mean_absolute_error: 0.018016, acc: 0.991042, mean_q: 1.000000 78114/100000: episode: 172, duration: 15.622s, episode steps: 522, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.850 [0.000, 228.000], loss: 0.714887, mean_absolute_error: 0.018603, acc: 0.990960, mean_q: 1.000000 78544/100000: episode: 173, duration: 12.919s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.737498, mean_absolute_error: 0.019308, acc: 0.989971, mean_q: 1.000000 78975/100000: episode: 174, duration: 12.894s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.797951, mean_absolute_error: 0.018914, acc: 0.990502, mean_q: 1.000000 79417/100000: episode: 175, duration: 13.310s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 12.094293, mean_absolute_error: 0.026394, acc: 0.991233, mean_q: 1.000000 79849/100000: episode: 176, duration: 12.968s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.746510, mean_absolute_error: 0.019266, acc: 0.991247, mean_q: 1.000000 80281/100000: episode: 177, duration: 13.057s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.878578, mean_absolute_error: 0.020813, acc: 0.990162, mean_q: 1.000000 80717/100000: episode: 178, duration: 12.996s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.747418, mean_absolute_error: 0.017835, acc: 0.990396, mean_q: 1.000000 81150/100000: episode: 179, duration: 13.030s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 2.231029, mean_absolute_error: 0.021677, acc: 0.991700, mean_q: 1.000000 81577/100000: episode: 180, duration: 12.772s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.783079, mean_absolute_error: 0.020112, acc: 0.990925, mean_q: 1.000000 82008/100000: episode: 181, duration: 12.885s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.696581, mean_absolute_error: 0.018047, acc: 0.991734, mean_q: 1.000000 82440/100000: episode: 182, duration: 13.183s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836997, mean_absolute_error: 0.019772, acc: 0.991030, mean_q: 1.000000 82871/100000: episode: 183, duration: 13.619s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.916 [0.000, 228.000], loss: 13.865776, mean_absolute_error: 0.028900, acc: 0.991372, mean_q: 1.000000 83293/100000: episode: 184, duration: 12.943s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 2.995 [0.000, 4.000], mean observation: 72.904 [0.000, 228.000], loss: 0.801700, mean_absolute_error: 0.020705, acc: 0.989929, mean_q: 1.000000 83729/100000: episode: 185, duration: 13.499s, episode steps: 436, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 6.508937, mean_absolute_error: 0.023269, acc: 0.990181, mean_q: 1.000000 84173/100000: episode: 186, duration: 13.531s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.848594, mean_absolute_error: 0.019877, acc: 0.991695, mean_q: 1.000000 84615/100000: episode: 187, duration: 13.393s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.853347, mean_absolute_error: 0.021670, acc: 0.990950, mean_q: 1.000000 85049/100000: episode: 188, duration: 13.252s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 6.567101, mean_absolute_error: 0.023859, acc: 0.991215, mean_q: 1.000000 85488/100000: episode: 189, duration: 13.414s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 12.108674, mean_absolute_error: 0.024965, acc: 0.991315, mean_q: 1.000000 85917/100000: episode: 190, duration: 13.049s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.760346, mean_absolute_error: 0.019699, acc: 0.989948, mean_q: 1.000000 86352/100000: episode: 191, duration: 13.272s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.691576, mean_absolute_error: 0.017892, acc: 0.991882, mean_q: 1.000000 86777/100000: episode: 192, duration: 12.884s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.688200, mean_absolute_error: 0.018250, acc: 0.989853, mean_q: 1.000000 87199/100000: episode: 193, duration: 12.817s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.798901, mean_absolute_error: 0.020241, acc: 0.992595, mean_q: 1.000000 87631/100000: episode: 194, duration: 13.218s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.758069, mean_absolute_error: 0.017800, acc: 0.991536, mean_q: 1.000000 88076/100000: episode: 195, duration: 13.552s, episode steps: 445, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.907 [0.000, 228.000], loss: 0.755982, mean_absolute_error: 0.019846, acc: 0.989045, mean_q: 1.000000 88513/100000: episode: 196, duration: 13.312s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.821040, mean_absolute_error: 0.020977, acc: 0.990775, mean_q: 1.000000 88975/100000: episode: 197, duration: 14.064s, episode steps: 462, steps per second: 33, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 3.000 [2.000, 4.000], mean observation: 72.892 [0.000, 228.000], loss: 0.816265, mean_absolute_error: 0.019451, acc: 0.990530, mean_q: 1.000000 89401/100000: episode: 198, duration: 12.976s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.783816, mean_absolute_error: 0.020237, acc: 0.990244, mean_q: 1.000000 89827/100000: episode: 199, duration: 12.980s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 3.702346, mean_absolute_error: 0.023155, acc: 0.990390, mean_q: 1.000000 90270/100000: episode: 200, duration: 13.735s, episode steps: 443, steps per second: 32, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.020 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 6.430346, mean_absolute_error: 0.023296, acc: 0.991112, mean_q: 1.000000 90691/100000: episode: 201, duration: 12.984s, episode steps: 421, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.711028, mean_absolute_error: 0.023351, acc: 0.990202, mean_q: 1.000000 91130/100000: episode: 202, duration: 14.014s, episode steps: 439, steps per second: 31, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.857046, mean_absolute_error: 0.020169, acc: 0.991173, mean_q: 1.000000 91578/100000: episode: 203, duration: 13.709s, episode steps: 448, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 7.718023, mean_absolute_error: 0.023832, acc: 0.991211, mean_q: 1.000000 92011/100000: episode: 204, duration: 13.224s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.776836, mean_absolute_error: 0.018400, acc: 0.990834, mean_q: 1.000000 done, took 2826.234 seconds
<keras.callbacks.History at 0x13853a208>
## Plot mean_reward
episodes = list(range(0, 66))
mean_reward = [0.401,0.139,0.137,0.237,0.109,0.212,0.211,0.207,0.142,0.143,0.208,
0.159,0.139,0.139,0.216,0.140,0.142,0.139,0.190,0.214,0.136,0.120,
0.141,0.208,0.139,0.141,0.141,0.141,0.140,0.207,0.116,0.139,0.140,
0.237,0.141,0.138,0.140,0.139,0.138,0.242,0.138,0.137,0.141,0.141,
0.137,0.140,0.142,0.137,0.139,0.208,0.238,0.138,0.139,0.136,0.138,
0.139,0.135,0.136,0.140,0.140,0.139,0.138,0.141,0.136,0.137,0.139]
plt.plot(episodes, mean_reward, 'r--')
plt.axis([0, 70, 0, 4])
plt.show()
## Plot loss and accuracy
episodes = list(range(0, 150))
loss = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,
1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,
0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,
0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,
0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,
0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,
0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,
0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,
0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,
0.7951, 0.7180,0.6970,0.7397, 0.7380,0.8324,0.7980,0.8889, 0.8174, 0.8040,0.796,
0.9588, 1.750, 2.289, 6.869,0.908,0.927,6.778,0.895,6.555,0.788,0.701,6.674,2.567,
2.43,2.234,1.070,3.657,2.296,0.889,0.8122,6.668,6.516,0.774, 0.730,0.739,0.831,7.99,
6.665,0.847,0.801,6.596,0.756,0.754,13.629,0.728,0.771,1.055,0.734,0.879,0.810,6.769]
plt.plot(episodes, loss, 'r--')
plt.axis([0, 150, 0, 12])
plt.show()
len(loss)
150
## Plot loss and accuracy por 100 episodes, Loss is decrasing and accuracy is growing during the first 100 episodes
episodes = list(range(0, 100))
loss_Bolzman = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,
1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,
0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,
0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,
0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,
0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,
0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,
0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,
0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,
0.7951, 0.7180]
plt.plot(episodes, loss_Bolzman, 'r--')
plt.axis([0, 110, 0, 4])
plt.show()
len(loss)
100
# Evaluate the algorithm for 10 episodes
dqn.test(env, nb_episodes=10, visualize=True)
Testing for 10 episodes ... Episode 1: reward: 210.000, steps: 488 Episode 2: reward: 210.000, steps: 502 Episode 3: reward: 210.000, steps: 490 Episode 4: reward: 210.000, steps: 499 Episode 5: reward: 210.000, steps: 499 Episode 6: reward: 210.000, steps: 507 Episode 7: reward: 210.000, steps: 502 Episode 8: reward: 210.000, steps: 508 Episode 9: reward: 210.000, steps: 498 Episode 10: reward: 210.000, steps: 490
<keras.callbacks.History at 0x189f56588>
## Save the weights of an agent from a HDF5 file
dqn.save_weights('dqn_{}_weights.h5f'.format(env), overwrite=True)
## Load the weights of an agent from an HDF5 file
dqn.load_weights('dqn_{}_weights.h5f')
## Information about the agent (https://github.com/matthiasplappert/keras-rl/blob/master/rl/core.py)
dqn.metrics_names()
dqn.layers()
#SARSA Agent -- Reinforcement Learning with no policy
from rl.agents.sarsa import SARSAAgent
sarsa = SARSAAgent(model, nb_actions,
policy=None, test_policy=None,
gamma=0.99, nb_steps_warmup=10,
train_interval=1)
sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
sarsa.fit(env, nb_steps=100000, visualize=True, verbose=2)
sarsa.test(env, nb_episodes=10, visualize=True)
Training for 100000 steps ... 966/100000: episode: 1, duration: 19.852s, episode steps: 966, steps per second: 49, episode reward: 440.000, mean reward: 0.455 [0.000, 200.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.770 [0.000, 228.000], loss: 23.349495, mean_absolute_error: 0.133976, acc: 0.910995, mean_q: 0.321679 2220/100000: episode: 2, duration: 20.781s, episode steps: 1254, steps per second: 60, episode reward: 440.000, mean reward: 0.351 [0.000, 200.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.743 [0.000, 228.000], loss: 17.760113, mean_absolute_error: 0.094084, acc: 0.919393, mean_q: 0.646693 3085/100000: episode: 3, duration: 7.325s, episode steps: 865, steps per second: 118, episode reward: 460.000, mean reward: 0.532 [0.000, 200.000], mean action: 7.603 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 25.900519, mean_absolute_error: 0.109459, acc: 0.906250, mean_q: 0.724962 3920/100000: episode: 4, duration: 6.853s, episode steps: 835, steps per second: 122, episode reward: 480.000, mean reward: 0.575 [0.000, 200.000], mean action: 7.686 [0.000, 8.000], mean observation: 72.701 [0.000, 228.000], loss: 26.829130, mean_absolute_error: 0.104061, acc: 0.926859, mean_q: 0.794797 4924/100000: episode: 5, duration: 8.052s, episode steps: 1004, steps per second: 125, episode reward: 240.000, mean reward: 0.239 [0.000, 50.000], mean action: 7.601 [0.000, 8.000], mean observation: 72.781 [0.000, 228.000], loss: 2.223164, mean_absolute_error: 0.064898, acc: 0.913260, mean_q: 0.863236 5799/100000: episode: 6, duration: 7.158s, episode steps: 875, steps per second: 122, episode reward: 440.000, mean reward: 0.503 [0.000, 200.000], mean action: 7.543 [0.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 25.469583, mean_absolute_error: 0.096295, acc: 0.898169, mean_q: 0.897363 6678/100000: episode: 7, duration: 7.295s, episode steps: 879, steps per second: 120, episode reward: 840.000, mean reward: 0.956 [0.000, 400.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 116.441804, mean_absolute_error: 0.142503, acc: 0.902050, mean_q: 0.919481 7795/100000: episode: 8, duration: 8.628s, episode steps: 1117, steps per second: 129, episode reward: 250.000, mean reward: 0.224 [0.000, 50.000], mean action: 7.655 [0.000, 8.000], mean observation: 72.716 [0.000, 228.000], loss: 2.105452, mean_absolute_error: 0.055605, acc: 0.920251, mean_q: 0.946919 9231/100000: episode: 9, duration: 11.622s, episode steps: 1436, steps per second: 124, episode reward: 360.000, mean reward: 0.251 [0.000, 50.000], mean action: 7.570 [0.000, 8.000], mean observation: 72.549 [0.000, 228.000], loss: 2.009200, mean_absolute_error: 0.060428, acc: 0.905226, mean_q: 0.952722 10253/100000: episode: 10, duration: 8.108s, episode steps: 1022, steps per second: 126, episode reward: 240.000, mean reward: 0.235 [0.000, 50.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.237449, mean_absolute_error: 0.055788, acc: 0.915769, mean_q: 0.958375 11275/100000: episode: 11, duration: 8.014s, episode steps: 1022, steps per second: 128, episode reward: 490.000, mean reward: 0.479 [0.000, 200.000], mean action: 7.599 [0.000, 8.000], mean observation: 72.781 [0.000, 228.000], loss: 22.073673, mean_absolute_error: 0.085744, acc: 0.907933, mean_q: 0.971059 11984/100000: episode: 12, duration: 5.775s, episode steps: 709, steps per second: 123, episode reward: 250.000, mean reward: 0.353 [0.000, 50.000], mean action: 7.626 [0.000, 8.000], mean observation: 72.810 [0.000, 228.000], loss: 3.251150, mean_absolute_error: 0.069292, acc: 0.911017, mean_q: 0.977070 12855/100000: episode: 13, duration: 7.077s, episode steps: 871, steps per second: 123, episode reward: 440.000, mean reward: 0.505 [0.000, 200.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 25.574995, mean_absolute_error: 0.084563, acc: 0.914943, mean_q: 0.981379 13990/100000: episode: 14, duration: 9.386s, episode steps: 1135, steps per second: 121, episode reward: 1650.000, mean reward: 1.454 [0.000, 800.000], mean action: 7.607 [0.000, 8.000], mean observation: 72.837 [0.000, 228.000], loss: 371.897181, mean_absolute_error: 0.188477, acc: 0.919753, mean_q: 0.990182 15105/100000: episode: 15, duration: 14.405s, episode steps: 1115, steps per second: 77, episode reward: 850.000, mean reward: 0.762 [0.000, 400.000], mean action: 7.587 [0.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 91.851788, mean_absolute_error: 0.114778, acc: 0.905745, mean_q: 0.992449 15901/100000: episode: 16, duration: 13.268s, episode steps: 796, steps per second: 60, episode reward: 520.000, mean reward: 0.653 [0.000, 200.000], mean action: 7.560 [0.000, 8.000], mean observation: 72.665 [0.000, 228.000], loss: 28.777944, mean_absolute_error: 0.102847, acc: 0.905660, mean_q: 0.993742 16892/100000: episode: 17, duration: 16.525s, episode steps: 991, steps per second: 60, episode reward: 530.000, mean reward: 0.535 [0.000, 250.000], mean action: 7.529 [0.000, 8.000], mean observation: 72.740 [0.000, 228.000], loss: 33.318019, mean_absolute_error: 0.092642, acc: 0.893939, mean_q: 0.994124 17419/100000: episode: 18, duration: 8.805s, episode steps: 527, steps per second: 60, episode reward: 120.000, mean reward: 0.228 [0.000, 10.000], mean action: 7.518 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 1.243997, mean_absolute_error: 0.056959, acc: 0.897338, mean_q: 0.994407 18320/100000: episode: 19, duration: 15.112s, episode steps: 901, steps per second: 60, episode reward: 440.000, mean reward: 0.488 [0.000, 200.000], mean action: 7.604 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 24.761886, mean_absolute_error: 0.082639, acc: 0.911111, mean_q: 0.995430 19158/100000: episode: 20, duration: 6.628s, episode steps: 838, steps per second: 126, episode reward: 440.000, mean reward: 0.525 [0.000, 200.000], mean action: 7.530 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 26.591505, mean_absolute_error: 0.089191, acc: 0.902031, mean_q: 0.996318 20011/100000: episode: 21, duration: 6.920s, episode steps: 853, steps per second: 123, episode reward: 460.000, mean reward: 0.539 [0.000, 200.000], mean action: 7.614 [0.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 26.203765, mean_absolute_error: 0.088394, acc: 0.910798, mean_q: 0.997042 20943/100000: episode: 22, duration: 7.347s, episode steps: 932, steps per second: 127, episode reward: 250.000, mean reward: 0.268 [0.000, 50.000], mean action: 7.655 [0.000, 8.000], mean observation: 72.785 [0.000, 228.000], loss: 2.483114, mean_absolute_error: 0.054482, acc: 0.921590, mean_q: 0.997791 21510/100000: episode: 23, duration: 4.377s, episode steps: 567, steps per second: 130, episode reward: 170.000, mean reward: 0.300 [0.000, 10.000], mean action: 7.566 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 1.587112, mean_absolute_error: 0.063345, acc: 0.908127, mean_q: 0.997974 22501/100000: episode: 24, duration: 7.652s, episode steps: 991, steps per second: 130, episode reward: 440.000, mean reward: 0.444 [0.000, 200.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.749 [0.000, 228.000], loss: 22.501602, mean_absolute_error: 0.074465, acc: 0.918182, mean_q: 0.998464 23134/100000: episode: 25, duration: 4.901s, episode steps: 633, steps per second: 129, episode reward: 210.000, mean reward: 0.332 [0.000, 10.000], mean action: 7.651 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 1.724502, mean_absolute_error: 0.061043, acc: 0.924051, mean_q: 0.998674 24017/100000: episode: 26, duration: 6.855s, episode steps: 883, steps per second: 129, episode reward: 840.000, mean reward: 0.951 [0.000, 400.000], mean action: 7.701 [0.000, 8.000], mean observation: 72.812 [0.000, 228.000], loss: 115.891073, mean_absolute_error: 0.126790, acc: 0.933107, mean_q: 0.999062 25116/100000: episode: 27, duration: 8.704s, episode steps: 1099, steps per second: 126, episode reward: 240.000, mean reward: 0.218 [0.000, 50.000], mean action: 7.641 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 2.091278, mean_absolute_error: 0.052662, acc: 0.912568, mean_q: 0.999349 25683/100000: episode: 28, duration: 4.560s, episode steps: 567, steps per second: 124, episode reward: 190.000, mean reward: 0.335 [0.000, 10.000], mean action: 7.630 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 1.711272, mean_absolute_error: 0.066766, acc: 0.904594, mean_q: 0.999385 26702/100000: episode: 29, duration: 7.994s, episode steps: 1019, steps per second: 127, episode reward: 450.000, mean reward: 0.442 [0.000, 210.000], mean action: 7.610 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 23.866865, mean_absolute_error: 0.076911, acc: 0.910609, mean_q: 0.999505 27526/100000: episode: 30, duration: 6.403s, episode steps: 824, steps per second: 129, episode reward: 520.000, mean reward: 0.631 [0.000, 200.000], mean action: 7.714 [0.000, 8.000], mean observation: 72.666 [0.000, 228.000], loss: 27.502925, mean_absolute_error: 0.091450, acc: 0.935601, mean_q: 0.999648 28375/100000: episode: 31, duration: 6.663s, episode steps: 849, steps per second: 127, episode reward: 440.000, mean reward: 0.518 [0.000, 200.000], mean action: 7.669 [0.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 26.230695, mean_absolute_error: 0.081821, acc: 0.925708, mean_q: 0.999724 29178/100000: episode: 32, duration: 8.344s, episode steps: 803, steps per second: 96, episode reward: 290.000, mean reward: 0.361 [0.000, 50.000], mean action: 7.578 [0.000, 8.000], mean observation: 72.741 [0.000, 228.000], loss: 3.113726, mean_absolute_error: 0.070727, acc: 0.899002, mean_q: 0.999793 30010/100000: episode: 33, duration: 6.812s, episode steps: 832, steps per second: 122, episode reward: 840.000, mean reward: 1.010 [0.000, 400.000], mean action: 7.683 [0.000, 8.000], mean observation: 72.794 [0.000, 228.000], loss: 122.562868, mean_absolute_error: 0.135093, acc: 0.930205, mean_q: 0.999827 30987/100000: episode: 34, duration: 8.184s, episode steps: 977, steps per second: 119, episode reward: 840.000, mean reward: 0.860 [0.000, 400.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.750 [0.000, 228.000], loss: 104.393781, mean_absolute_error: 0.128820, acc: 0.894467, mean_q: 0.999898 31840/100000: episode: 35, duration: 6.706s, episode steps: 853, steps per second: 127, episode reward: 500.000, mean reward: 0.586 [0.000, 200.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.674 [0.000, 228.000], loss: 26.466181, mean_absolute_error: 0.092597, acc: 0.906103, mean_q: 0.999918 32373/100000: episode: 36, duration: 4.210s, episode steps: 533, steps per second: 127, episode reward: 140.000, mean reward: 0.263 [0.000, 10.000], mean action: 7.525 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.362516, mean_absolute_error: 0.058625, acc: 0.907895, mean_q: 0.999926 33181/100000: episode: 37, duration: 6.368s, episode steps: 808, steps per second: 127, episode reward: 450.000, mean reward: 0.557 [0.000, 200.000], mean action: 7.551 [0.000, 8.000], mean observation: 72.751 [0.000, 228.000], loss: 27.561425, mean_absolute_error: 0.093955, acc: 0.897150, mean_q: 0.999936 33935/100000: episode: 38, duration: 5.940s, episode steps: 754, steps per second: 127, episode reward: 240.000, mean reward: 0.318 [0.000, 50.000], mean action: 7.614 [0.000, 8.000], mean observation: 72.769 [0.000, 228.000], loss: 3.023288, mean_absolute_error: 0.063794, acc: 0.908367, mean_q: 0.999946 34732/100000: episode: 39, duration: 6.283s, episode steps: 797, steps per second: 127, episode reward: 240.000, mean reward: 0.301 [0.000, 50.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.833605, mean_absolute_error: 0.063442, acc: 0.905779, mean_q: 0.999952 35523/100000: episode: 40, duration: 6.237s, episode steps: 791, steps per second: 127, episode reward: 250.000, mean reward: 0.316 [0.000, 50.000], mean action: 7.716 [0.000, 8.000], mean observation: 72.799 [0.000, 228.000], loss: 2.894485, mean_absolute_error: 0.056825, acc: 0.934177, mean_q: 0.999958 36105/100000: episode: 41, duration: 4.556s, episode steps: 582, steps per second: 128, episode reward: 150.000, mean reward: 0.258 [0.000, 10.000], mean action: 7.586 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 1.353258, mean_absolute_error: 0.057389, acc: 0.905336, mean_q: 0.999964 36952/100000: episode: 42, duration: 6.637s, episode steps: 847, steps per second: 128, episode reward: 440.000, mean reward: 0.519 [0.000, 50.000], mean action: 7.530 [0.000, 8.000], mean observation: 72.594 [0.000, 228.000], loss: 3.869566, mean_absolute_error: 0.090140, acc: 0.892435, mean_q: 0.999973 37835/100000: episode: 43, duration: 6.949s, episode steps: 883, steps per second: 127, episode reward: 240.000, mean reward: 0.272 [0.000, 50.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.839 [0.000, 228.000], loss: 2.552832, mean_absolute_error: 0.056240, acc: 0.916100, mean_q: 0.999982 38553/100000: episode: 44, duration: 5.641s, episode steps: 718, steps per second: 127, episode reward: 320.000, mean reward: 0.446 [0.000, 50.000], mean action: 7.653 [0.000, 8.000], mean observation: 72.705 [0.000, 228.000], loss: 3.684632, mean_absolute_error: 0.076542, acc: 0.914923, mean_q: 0.999985 39438/100000: episode: 45, duration: 6.912s, episode steps: 885, steps per second: 128, episode reward: 450.000, mean reward: 0.508 [0.000, 200.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.841 [0.000, 228.000], loss: 25.235985, mean_absolute_error: 0.084998, acc: 0.911765, mean_q: 0.999990 40406/100000: episode: 46, duration: 8.010s, episode steps: 968, steps per second: 121, episode reward: 500.000, mean reward: 0.517 [0.000, 200.000], mean action: 7.512 [0.000, 8.000], mean observation: 72.763 [0.000, 228.000], loss: 23.345646, mean_absolute_error: 0.090591, acc: 0.890383, mean_q: 0.999993 41195/100000: episode: 47, duration: 6.431s, episode steps: 789, steps per second: 123, episode reward: 840.000, mean reward: 1.065 [0.000, 400.000], mean action: 7.549 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 129.524892, mean_absolute_error: 0.149651, acc: 0.901015, mean_q: 0.999995 42077/100000: episode: 48, duration: 7.383s, episode steps: 882, steps per second: 119, episode reward: 240.000, mean reward: 0.272 [0.000, 50.000], mean action: 7.596 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 2.581690, mean_absolute_error: 0.060674, acc: 0.906924, mean_q: 0.999996 42820/100000: episode: 49, duration: 6.010s, episode steps: 743, steps per second: 124, episode reward: 320.000, mean reward: 0.431 [0.000, 50.000], mean action: 7.661 [0.000, 8.000], mean observation: 72.792 [0.000, 228.000], loss: 3.586110, mean_absolute_error: 0.073265, acc: 0.921833, mean_q: 0.999997 43785/100000: episode: 50, duration: 8.156s, episode steps: 965, steps per second: 118, episode reward: 840.000, mean reward: 0.870 [0.000, 400.000], mean action: 7.511 [0.000, 8.000], mean observation: 72.791 [0.000, 228.000], loss: 106.296034, mean_absolute_error: 0.128120, acc: 0.900415, mean_q: 0.999997 44492/100000: episode: 51, duration: 5.798s, episode steps: 707, steps per second: 122, episode reward: 340.000, mean reward: 0.481 [0.000, 50.000], mean action: 7.644 [0.000, 8.000], mean observation: 72.764 [0.000, 228.000], loss: 3.898969, mean_absolute_error: 0.081017, acc: 0.915014, mean_q: 0.999998 45008/100000: episode: 52, duration: 4.049s, episode steps: 516, steps per second: 127, episode reward: 110.000, mean reward: 0.213 [0.000, 10.000], mean action: 7.616 [0.000, 8.000], mean observation: 72.874 [0.000, 228.000], loss: 1.149696, mean_absolute_error: 0.052781, acc: 0.908738, mean_q: 0.999998 45864/100000: episode: 53, duration: 6.788s, episode steps: 856, steps per second: 126, episode reward: 450.000, mean reward: 0.526 [0.000, 200.000], mean action: 7.654 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 26.111743, mean_absolute_error: 0.086688, acc: 0.912281, mean_q: 0.999998 46447/100000: episode: 54, duration: 4.697s, episode steps: 583, steps per second: 124, episode reward: 250.000, mean reward: 0.429 [0.000, 10.000], mean action: 7.628 [0.000, 8.000], mean observation: 72.879 [0.000, 228.000], loss: 2.198980, mean_absolute_error: 0.072940, acc: 0.919244, mean_q: 0.999999 46962/100000: episode: 55, duration: 3.957s, episode steps: 515, steps per second: 130, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 7.555 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.142107, mean_absolute_error: 0.055873, acc: 0.900778, mean_q: 0.999999 47846/100000: episode: 56, duration: 7.034s, episode steps: 884, steps per second: 126, episode reward: 240.000, mean reward: 0.271 [0.000, 50.000], mean action: 7.538 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.629218, mean_absolute_error: 0.063533, acc: 0.892412, mean_q: 0.999999 48636/100000: episode: 57, duration: 6.133s, episode steps: 790, steps per second: 129, episode reward: 920.000, mean reward: 1.165 [0.000, 400.000], mean action: 7.600 [0.000, 8.000], mean observation: 72.732 [0.000, 228.000], loss: 130.072215, mean_absolute_error: 0.158051, acc: 0.910013, mean_q: 0.999999 49211/100000: episode: 58, duration: 4.527s, episode steps: 575, steps per second: 127, episode reward: 230.000, mean reward: 0.400 [0.000, 10.000], mean action: 7.487 [0.000, 8.000], mean observation: 72.775 [0.000, 228.000], loss: 2.043686, mean_absolute_error: 0.076785, acc: 0.893728, mean_q: 1.000000 50064/100000: episode: 59, duration: 6.762s, episode steps: 853, steps per second: 126, episode reward: 440.000, mean reward: 0.516 [0.000, 200.000], mean action: 7.653 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 26.108275, mean_absolute_error: 0.085424, acc: 0.909624, mean_q: 1.000000 50907/100000: episode: 60, duration: 6.492s, episode steps: 843, steps per second: 130, episode reward: 280.000, mean reward: 0.332 [0.000, 50.000], mean action: 7.619 [0.000, 8.000], mean observation: 72.713 [0.000, 228.000], loss: 2.913751, mean_absolute_error: 0.061126, acc: 0.918052, mean_q: 1.000000 52066/100000: episode: 61, duration: 9.202s, episode steps: 1159, steps per second: 126, episode reward: 240.000, mean reward: 0.207 [0.000, 50.000], mean action: 7.677 [0.000, 8.000], mean observation: 72.739 [0.000, 228.000], loss: 1.964234, mean_absolute_error: 0.048993, acc: 0.918826, mean_q: 1.000000 52669/100000: episode: 62, duration: 4.886s, episode steps: 603, steps per second: 123, episode reward: 250.000, mean reward: 0.415 [0.000, 10.000], mean action: 7.587 [0.000, 8.000], mean observation: 72.773 [0.000, 228.000], loss: 2.122637, mean_absolute_error: 0.074025, acc: 0.911960, mean_q: 1.000000 53498/100000: episode: 63, duration: 6.810s, episode steps: 829, steps per second: 122, episode reward: 540.000, mean reward: 0.651 [0.000, 200.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.644 [0.000, 228.000], loss: 27.518876, mean_absolute_error: 0.102597, acc: 0.902174, mean_q: 1.000000 54438/100000: episode: 64, duration: 7.521s, episode steps: 940, steps per second: 125, episode reward: 240.000, mean reward: 0.255 [0.000, 50.000], mean action: 7.545 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.387436, mean_absolute_error: 0.059689, acc: 0.902023, mean_q: 1.000000 55452/100000: episode: 65, duration: 7.977s, episode steps: 1014, steps per second: 127, episode reward: 520.000, mean reward: 0.513 [0.000, 50.000], mean action: 7.628 [0.000, 8.000], mean observation: 72.525 [0.000, 228.000], loss: 3.588693, mean_absolute_error: 0.083786, acc: 0.912142, mean_q: 1.000000 56369/100000: episode: 66, duration: 7.691s, episode steps: 917, steps per second: 119, episode reward: 840.000, mean reward: 0.916 [0.000, 400.000], mean action: 7.550 [0.000, 8.000], mean observation: 72.793 [0.000, 228.000], loss: 111.591030, mean_absolute_error: 0.129850, acc: 0.909389, mean_q: 1.000000 57240/100000: episode: 67, duration: 7.270s, episode steps: 871, steps per second: 120, episode reward: 470.000, mean reward: 0.540 [0.000, 200.000], mean action: 7.629 [0.000, 8.000], mean observation: 72.772 [0.000, 228.000], loss: 25.765324, mean_absolute_error: 0.084851, acc: 0.920690, mean_q: 1.000000 58194/100000: episode: 68, duration: 7.642s, episode steps: 954, steps per second: 125, episode reward: 520.000, mean reward: 0.545 [0.000, 200.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.745 [0.000, 228.000], loss: 23.569458, mean_absolute_error: 0.087988, acc: 0.913956, mean_q: 1.000000 59060/100000: episode: 69, duration: 6.935s, episode steps: 866, steps per second: 125, episode reward: 840.000, mean reward: 0.970 [0.000, 400.000], mean action: 7.565 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 118.224175, mean_absolute_error: 0.137821, acc: 0.905202, mean_q: 1.000000 59834/100000: episode: 70, duration: 5.999s, episode steps: 774, steps per second: 129, episode reward: 520.000, mean reward: 0.672 [0.000, 200.000], mean action: 7.612 [0.000, 8.000], mean observation: 72.746 [0.000, 228.000], loss: 29.306530, mean_absolute_error: 0.101251, acc: 0.915912, mean_q: 1.000000 60924/100000: episode: 71, duration: 11.196s, episode steps: 1090, steps per second: 97, episode reward: 240.000, mean reward: 0.220 [0.000, 50.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 2.112230, mean_absolute_error: 0.055560, acc: 0.898072, mean_q: 1.000000 61952/100000: episode: 72, duration: 8.290s, episode steps: 1028, steps per second: 124, episode reward: 920.000, mean reward: 0.895 [0.000, 400.000], mean action: 7.568 [0.000, 8.000], mean observation: 72.769 [0.000, 228.000], loss: 99.638470, mean_absolute_error: 0.130471, acc: 0.903603, mean_q: 1.000000 62842/100000: episode: 73, duration: 6.951s, episode steps: 890, steps per second: 128, episode reward: 840.000, mean reward: 0.944 [0.000, 400.000], mean action: 7.609 [0.000, 8.000], mean observation: 72.804 [0.000, 228.000], loss: 115.731040, mean_absolute_error: 0.134264, acc: 0.908886, mean_q: 1.000000 63356/100000: episode: 74, duration: 4.006s, episode steps: 514, steps per second: 128, episode reward: 120.000, mean reward: 0.233 [0.000, 10.000], mean action: 7.541 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.247589, mean_absolute_error: 0.054930, acc: 0.904483, mean_q: 1.000000 63985/100000: episode: 75, duration: 4.914s, episode steps: 629, steps per second: 128, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.650 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.968055, mean_absolute_error: 0.046987, acc: 0.915605, mean_q: 1.000000 64795/100000: episode: 76, duration: 6.385s, episode steps: 810, steps per second: 127, episode reward: 440.000, mean reward: 0.543 [0.000, 200.000], mean action: 7.648 [0.000, 8.000], mean observation: 72.752 [0.000, 228.000], loss: 27.501371, mean_absolute_error: 0.084056, acc: 0.925834, mean_q: 1.000000 65724/100000: episode: 77, duration: 7.229s, episode steps: 929, steps per second: 129, episode reward: 450.000, mean reward: 0.484 [0.000, 200.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 23.860660, mean_absolute_error: 0.078057, acc: 0.921336, mean_q: 1.000000 66513/100000: episode: 78, duration: 10.277s, episode steps: 789, steps per second: 77, episode reward: 240.000, mean reward: 0.304 [0.000, 50.000], mean action: 7.662 [0.000, 8.000], mean observation: 72.827 [0.000, 228.000], loss: 2.873815, mean_absolute_error: 0.059327, acc: 0.922589, mean_q: 1.000000 67519/100000: episode: 79, duration: 7.801s, episode steps: 1006, steps per second: 129, episode reward: 850.000, mean reward: 0.845 [0.000, 400.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.835 [0.000, 228.000], loss: 101.992474, mean_absolute_error: 0.119336, acc: 0.918408, mean_q: 1.000000 68396/100000: episode: 80, duration: 6.807s, episode steps: 877, steps per second: 129, episode reward: 450.000, mean reward: 0.513 [0.000, 200.000], mean action: 7.596 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 25.428141, mean_absolute_error: 0.084259, acc: 0.913242, mean_q: 1.000000 69183/100000: episode: 81, duration: 6.107s, episode steps: 787, steps per second: 129, episode reward: 840.000, mean reward: 1.067 [0.000, 400.000], mean action: 7.682 [0.000, 8.000], mean observation: 72.850 [0.000, 228.000], loss: 130.164832, mean_absolute_error: 0.144570, acc: 0.922392, mean_q: 1.000000 69968/100000: episode: 82, duration: 6.078s, episode steps: 785, steps per second: 129, episode reward: 450.000, mean reward: 0.573 [0.000, 200.000], mean action: 7.618 [0.000, 8.000], mean observation: 72.775 [0.000, 228.000], loss: 28.480391, mean_absolute_error: 0.087428, acc: 0.922194, mean_q: 1.000000 70897/100000: episode: 83, duration: 7.275s, episode steps: 929, steps per second: 128, episode reward: 840.000, mean reward: 0.904 [0.000, 400.000], mean action: 7.635 [0.000, 8.000], mean observation: 72.838 [0.000, 228.000], loss: 110.639460, mean_absolute_error: 0.127834, acc: 0.917026, mean_q: 1.000000 71913/100000: episode: 84, duration: 8.216s, episode steps: 1016, steps per second: 124, episode reward: 250.000, mean reward: 0.246 [0.000, 50.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 2.298804, mean_absolute_error: 0.057479, acc: 0.903448, mean_q: 1.000000 72995/100000: episode: 85, duration: 8.460s, episode steps: 1082, steps per second: 128, episode reward: 480.000, mean reward: 0.444 [0.000, 50.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.474 [0.000, 228.000], loss: 3.197630, mean_absolute_error: 0.081302, acc: 0.896392, mean_q: 1.000000 73886/100000: episode: 86, duration: 6.948s, episode steps: 891, steps per second: 128, episode reward: 250.000, mean reward: 0.281 [0.000, 50.000], mean action: 7.590 [0.000, 8.000], mean observation: 72.815 [0.000, 228.000], loss: 2.540349, mean_absolute_error: 0.059831, acc: 0.908989, mean_q: 1.000000 74673/100000: episode: 87, duration: 6.144s, episode steps: 787, steps per second: 128, episode reward: 440.000, mean reward: 0.559 [0.000, 200.000], mean action: 7.582 [0.000, 8.000], mean observation: 72.830 [0.000, 228.000], loss: 28.320099, mean_absolute_error: 0.090964, acc: 0.908397, mean_q: 1.000000 75725/100000: episode: 88, duration: 8.263s, episode steps: 1052, steps per second: 127, episode reward: 440.000, mean reward: 0.418 [0.000, 210.000], mean action: 7.647 [0.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 22.874603, mean_absolute_error: 0.073542, acc: 0.915319, mean_q: 1.000000 77082/100000: episode: 89, duration: 10.597s, episode steps: 1357, steps per second: 128, episode reward: 360.000, mean reward: 0.265 [0.000, 50.000], mean action: 7.580 [0.000, 8.000], mean observation: 72.650 [0.000, 228.000], loss: 2.131358, mean_absolute_error: 0.061060, acc: 0.899705, mean_q: 1.000000 77781/100000: episode: 90, duration: 5.501s, episode steps: 699, steps per second: 127, episode reward: 320.000, mean reward: 0.458 [0.000, 50.000], mean action: 7.597 [0.000, 8.000], mean observation: 72.710 [0.000, 228.000], loss: 3.825074, mean_absolute_error: 0.081441, acc: 0.906877, mean_q: 1.000000 78863/100000: episode: 91, duration: 8.454s, episode steps: 1082, steps per second: 128, episode reward: 350.000, mean reward: 0.323 [0.000, 50.000], mean action: 7.560 [0.000, 8.000], mean observation: 72.591 [0.000, 228.000], loss: 2.660822, mean_absolute_error: 0.065323, acc: 0.905643, mean_q: 1.000000 79758/100000: episode: 92, duration: 6.975s, episode steps: 895, steps per second: 128, episode reward: 450.000, mean reward: 0.503 [0.000, 200.000], mean action: 7.515 [0.000, 8.000], mean observation: 72.772 [0.000, 228.000], loss: 25.028459, mean_absolute_error: 0.087384, acc: 0.898210, mean_q: 1.000000 80623/100000: episode: 93, duration: 6.753s, episode steps: 865, steps per second: 128, episode reward: 330.000, mean reward: 0.382 [0.000, 50.000], mean action: 7.595 [0.000, 8.000], mean observation: 72.696 [0.000, 228.000], loss: 3.137615, mean_absolute_error: 0.072383, acc: 0.901620, mean_q: 1.000000 81256/100000: episode: 94, duration: 4.939s, episode steps: 633, steps per second: 128, episode reward: 180.000, mean reward: 0.284 [0.000, 10.000], mean action: 7.555 [0.000, 8.000], mean observation: 72.880 [0.000, 228.000], loss: 1.506944, mean_absolute_error: 0.061245, acc: 0.905063, mean_q: 1.000000 82119/100000: episode: 95, duration: 6.707s, episode steps: 863, steps per second: 129, episode reward: 440.000, mean reward: 0.510 [0.000, 200.000], mean action: 7.622 [0.000, 8.000], mean observation: 72.793 [0.000, 228.000], loss: 25.798052, mean_absolute_error: 0.084959, acc: 0.910673, mean_q: 1.000000 83096/100000: episode: 96, duration: 7.616s, episode steps: 977, steps per second: 128, episode reward: 850.000, mean reward: 0.870 [0.000, 400.000], mean action: 7.672 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 104.618345, mean_absolute_error: 0.120369, acc: 0.922131, mean_q: 1.000000 83961/100000: episode: 97, duration: 6.750s, episode steps: 865, steps per second: 128, episode reward: 330.000, mean reward: 0.382 [0.000, 50.000], mean action: 7.570 [0.000, 8.000], mean observation: 72.631 [0.000, 228.000], loss: 3.114582, mean_absolute_error: 0.072126, acc: 0.899306, mean_q: 1.000000 84566/100000: episode: 98, duration: 4.728s, episode steps: 605, steps per second: 128, episode reward: 140.000, mean reward: 0.231 [0.000, 10.000], mean action: 7.633 [0.000, 8.000], mean observation: 72.877 [0.000, 228.000], loss: 1.210112, mean_absolute_error: 0.050493, acc: 0.918874, mean_q: 1.000000 85462/100000: episode: 99, duration: 7.022s, episode steps: 896, steps per second: 128, episode reward: 490.000, mean reward: 0.547 [0.000, 200.000], mean action: 7.643 [0.000, 8.000], mean observation: 72.731 [0.000, 228.000], loss: 25.314672, mean_absolute_error: 0.088062, acc: 0.913966, mean_q: 1.000000 86243/100000: episode: 100, duration: 6.134s, episode steps: 781, steps per second: 127, episode reward: 450.000, mean reward: 0.576 [0.000, 200.000], mean action: 7.603 [0.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 28.307328, mean_absolute_error: 0.091382, acc: 0.911538, mean_q: 1.000000 87223/100000: episode: 101, duration: 7.687s, episode steps: 980, steps per second: 127, episode reward: 450.000, mean reward: 0.459 [0.000, 200.000], mean action: 7.545 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 22.798168, mean_absolute_error: 0.081012, acc: 0.905005, mean_q: 1.000000 88124/100000: episode: 102, duration: 7.013s, episode steps: 901, steps per second: 128, episode reward: 450.000, mean reward: 0.499 [0.000, 200.000], mean action: 7.511 [0.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 24.825180, mean_absolute_error: 0.090104, acc: 0.890000, mean_q: 1.000000 88941/100000: episode: 103, duration: 6.389s, episode steps: 817, steps per second: 128, episode reward: 330.000, mean reward: 0.404 [0.000, 50.000], mean action: 7.553 [0.000, 8.000], mean observation: 72.699 [0.000, 228.000], loss: 3.368771, mean_absolute_error: 0.075227, acc: 0.900735, mean_q: 1.000000 89994/100000: episode: 104, duration: 8.242s, episode steps: 1053, steps per second: 128, episode reward: 840.000, mean reward: 0.798 [0.000, 400.000], mean action: 7.640 [0.000, 8.000], mean observation: 72.799 [0.000, 228.000], loss: 97.199268, mean_absolute_error: 0.116136, acc: 0.910646, mean_q: 1.000000 90850/100000: episode: 105, duration: 6.707s, episode steps: 856, steps per second: 128, episode reward: 280.000, mean reward: 0.327 [0.000, 50.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.757 [0.000, 228.000], loss: 2.850689, mean_absolute_error: 0.064840, acc: 0.908772, mean_q: 1.000000 91796/100000: episode: 106, duration: 7.387s, episode steps: 946, steps per second: 128, episode reward: 500.000, mean reward: 0.529 [0.000, 200.000], mean action: 7.489 [0.000, 8.000], mean observation: 72.745 [0.000, 228.000], loss: 24.115241, mean_absolute_error: 0.090460, acc: 0.895238, mean_q: 1.000000 92631/100000: episode: 107, duration: 6.531s, episode steps: 835, steps per second: 128, episode reward: 850.000, mean reward: 1.018 [0.000, 400.000], mean action: 7.544 [0.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 122.685083, mean_absolute_error: 0.146609, acc: 0.890887, mean_q: 1.000000 93520/100000: episode: 108, duration: 6.946s, episode steps: 889, steps per second: 128, episode reward: 440.000, mean reward: 0.495 [0.000, 200.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.825 [0.000, 228.000], loss: 25.282309, mean_absolute_error: 0.084610, acc: 0.903153, mean_q: 1.000000 94361/100000: episode: 109, duration: 6.602s, episode steps: 841, steps per second: 127, episode reward: 330.000, mean reward: 0.392 [0.000, 50.000], mean action: 7.633 [0.000, 8.000], mean observation: 72.638 [0.000, 228.000], loss: 3.195201, mean_absolute_error: 0.067343, acc: 0.922619, mean_q: 1.000000 94903/100000: episode: 110, duration: 4.239s, episode steps: 542, steps per second: 128, episode reward: 200.000, mean reward: 0.369 [0.000, 10.000], mean action: 7.683 [0.000, 8.000], mean observation: 72.835 [0.000, 228.000], loss: 1.897125, mean_absolute_error: 0.065492, acc: 0.920518, mean_q: 1.000000 95480/100000: episode: 111, duration: 4.534s, episode steps: 577, steps per second: 127, episode reward: 150.000, mean reward: 0.260 [0.000, 10.000], mean action: 7.624 [0.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 1.386677, mean_absolute_error: 0.058260, acc: 0.909722, mean_q: 1.000000 96189/100000: episode: 112, duration: 5.534s, episode steps: 709, steps per second: 128, episode reward: 450.000, mean reward: 0.635 [0.000, 200.000], mean action: 7.573 [0.000, 8.000], mean observation: 72.778 [0.000, 228.000], loss: 31.451715, mean_absolute_error: 0.099940, acc: 0.906780, mean_q: 1.000000 96900/100000: episode: 113, duration: 5.571s, episode steps: 711, steps per second: 128, episode reward: 450.000, mean reward: 0.633 [0.000, 200.000], mean action: 7.615 [0.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 31.400907, mean_absolute_error: 0.099200, acc: 0.908451, mean_q: 1.000000 97995/100000: episode: 114, duration: 8.594s, episode steps: 1095, steps per second: 127, episode reward: 240.000, mean reward: 0.219 [0.000, 50.000], mean action: 7.572 [0.000, 8.000], mean observation: 72.780 [0.000, 228.000], loss: 2.085018, mean_absolute_error: 0.054207, acc: 0.905850, mean_q: 1.000000 98786/100000: episode: 115, duration: 6.189s, episode steps: 791, steps per second: 128, episode reward: 930.000, mean reward: 1.176 [0.000, 400.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.678 [0.000, 228.000], loss: 129.505934, mean_absolute_error: 0.158702, acc: 0.915190, mean_q: 1.000000 99578/100000: episode: 116, duration: 6.201s, episode steps: 792, steps per second: 128, episode reward: 240.000, mean reward: 0.303 [0.000, 50.000], mean action: 7.539 [0.000, 8.000], mean observation: 72.811 [0.000, 228.000], loss: 2.884318, mean_absolute_error: 0.063420, acc: 0.901391, mean_q: 1.000000 done, took 890.633 seconds Testing for 10 episodes ... Episode 1: reward: 70.000, steps: 631 Episode 2: reward: 70.000, steps: 640 Episode 3: reward: 70.000, steps: 624 Episode 4: reward: 70.000, steps: 616 Episode 5: reward: 70.000, steps: 624 Episode 6: reward: 70.000, steps: 633 Episode 7: reward: 70.000, steps: 629 Episode 8: reward: 70.000, steps: 621 Episode 9: reward: 70.000, steps: 640 Episode 10: reward: 70.000, steps: 615
<keras.callbacks.History at 0x123c84ba8>
sarsa = SARSAAgent(model, nb_actions,
policy=policy, test_policy=None,
gamma=0.99, nb_steps_warmup=10,
train_interval=1)
sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
sarsa.fit(env, nb_steps=100000, visualize=True, verbose=2)
sarsa.test(env, nb_episodes=10, visualize=True)
Training for 100000 steps ... 789/100000: episode: 1, duration: 8.591s, episode steps: 789, steps per second: 92, episode reward: 270.000, mean reward: 0.342 [0.000, 50.000], mean action: 7.024 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 3.280811, mean_absolute_error: 0.095194, acc: 0.786632, mean_q: 1.000000 1790/100000: episode: 2, duration: 7.855s, episode steps: 1001, steps per second: 127, episode reward: 570.000, mean reward: 0.569 [0.000, 200.000], mean action: 7.797 [0.000, 8.000], mean observation: 72.541 [0.000, 228.000], loss: 22.827030, mean_absolute_error: 0.077349, acc: 0.954000, mean_q: 1.000000 2623/100000: episode: 3, duration: 6.634s, episode steps: 833, steps per second: 126, episode reward: 840.000, mean reward: 1.008 [0.000, 400.000], mean action: 7.819 [0.000, 8.000], mean observation: 72.809 [0.000, 228.000], loss: 122.849101, mean_absolute_error: 0.124602, acc: 0.962740, mean_q: 1.000000 3374/100000: episode: 4, duration: 5.933s, episode steps: 751, steps per second: 127, episode reward: 110.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.879 [0.000, 8.000], mean observation: 72.877 [0.000, 228.000], loss: 0.744462, mean_absolute_error: 0.025588, acc: 0.973333, mean_q: 1.000000 4005/100000: episode: 5, duration: 4.980s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.938 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.585303, mean_absolute_error: 0.018325, acc: 0.984127, mean_q: 1.000000 4851/100000: episode: 6, duration: 6.910s, episode steps: 846, steps per second: 122, episode reward: 440.000, mean reward: 0.520 [0.000, 200.000], mean action: 7.862 [0.000, 8.000], mean observation: 72.819 [0.000, 228.000], loss: 26.284027, mean_absolute_error: 0.068155, acc: 0.970414, mean_q: 1.000000 5748/100000: episode: 7, duration: 7.205s, episode steps: 897, steps per second: 124, episode reward: 440.000, mean reward: 0.491 [0.000, 200.000], mean action: 7.928 [0.000, 8.000], mean observation: 72.817 [0.000, 228.000], loss: 24.786848, mean_absolute_error: 0.060533, acc: 0.984375, mean_q: 1.000000 6703/100000: episode: 8, duration: 7.544s, episode steps: 955, steps per second: 127, episode reward: 240.000, mean reward: 0.251 [0.000, 50.000], mean action: 7.938 [0.000, 8.000], mean observation: 72.719 [0.000, 228.000], loss: 2.308288, mean_absolute_error: 0.033744, acc: 0.984277, mean_q: 1.000000 7332/100000: episode: 9, duration: 4.978s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.976 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561785, mean_absolute_error: 0.015223, acc: 0.993631, mean_q: 1.000000 8208/100000: episode: 10, duration: 6.942s, episode steps: 876, steps per second: 126, episode reward: 440.000, mean reward: 0.502 [0.000, 200.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.792 [0.000, 228.000], loss: 25.376075, mean_absolute_error: 0.060074, acc: 0.989714, mean_q: 1.000000 8783/100000: episode: 11, duration: 4.834s, episode steps: 575, steps per second: 119, episode reward: 140.000, mean reward: 0.243 [0.000, 10.000], mean action: 7.932 [0.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.250938, mean_absolute_error: 0.033527, acc: 0.982578, mean_q: 1.000000 9401/100000: episode: 12, duration: 5.140s, episode steps: 618, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.571800, mean_absolute_error: 0.015475, acc: 0.993517, mean_q: 1.000000 10323/100000: episode: 13, duration: 7.536s, episode steps: 922, steps per second: 122, episode reward: 840.000, mean reward: 0.911 [0.000, 400.000], mean action: 7.959 [1.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 110.953515, mean_absolute_error: 0.104428, acc: 0.992400, mean_q: 1.000000 10948/100000: episode: 14, duration: 4.942s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.565386, mean_absolute_error: 0.015313, acc: 0.993590, mean_q: 1.000000 11577/100000: episode: 15, duration: 4.973s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.973 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563362, mean_absolute_error: 0.015748, acc: 0.992038, mean_q: 1.000000 12204/100000: episode: 16, duration: 4.928s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.563580, mean_absolute_error: 0.015268, acc: 0.993610, mean_q: 1.000000 13106/100000: episode: 17, duration: 7.141s, episode steps: 902, steps per second: 126, episode reward: 440.000, mean reward: 0.488 [0.000, 210.000], mean action: 7.966 [1.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 26.851471, mean_absolute_error: 0.057764, acc: 0.991121, mean_q: 1.000000 13841/100000: episode: 18, duration: 5.842s, episode steps: 735, steps per second: 126, episode reward: 150.000, mean reward: 0.204 [0.000, 10.000], mean action: 7.958 [1.000, 8.000], mean observation: 72.865 [0.000, 228.000], loss: 1.032624, mean_absolute_error: 0.028118, acc: 0.985014, mean_q: 1.000000 14482/100000: episode: 19, duration: 5.068s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.978 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554346, mean_absolute_error: 0.015990, acc: 0.990625, mean_q: 1.000000 15109/100000: episode: 20, duration: 4.967s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.565161, mean_absolute_error: 0.015795, acc: 0.992013, mean_q: 1.000000 15720/100000: episode: 21, duration: 4.823s, episode steps: 611, steps per second: 127, episode reward: 80.000, mean reward: 0.131 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.656919, mean_absolute_error: 0.016375, acc: 0.996721, mean_q: 1.000000 16345/100000: episode: 22, duration: 4.929s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000 16951/100000: episode: 23, duration: 4.810s, episode steps: 606, steps per second: 126, episode reward: 90.000, mean reward: 0.149 [0.000, 10.000], mean action: 7.962 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.751372, mean_absolute_error: 0.020516, acc: 0.990083, mean_q: 1.000000 17576/100000: episode: 24, duration: 4.960s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.957 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570146, mean_absolute_error: 0.016900, acc: 0.988782, mean_q: 1.000000 18637/100000: episode: 25, duration: 8.378s, episode steps: 1061, steps per second: 127, episode reward: 840.000, mean reward: 0.792 [0.000, 400.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.842 [0.000, 228.000], loss: 96.410495, mean_absolute_error: 0.090153, acc: 0.996226, mean_q: 1.000000 19465/100000: episode: 26, duration: 6.697s, episode steps: 828, steps per second: 124, episode reward: 240.000, mean reward: 0.290 [0.000, 50.000], mean action: 7.949 [0.000, 8.000], mean observation: 72.794 [0.000, 228.000], loss: 2.667546, mean_absolute_error: 0.036628, acc: 0.989117, mean_q: 1.000000 20634/100000: episode: 27, duration: 9.208s, episode steps: 1169, steps per second: 127, episode reward: 840.000, mean reward: 0.719 [0.000, 400.000], mean action: 7.964 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 87.500916, mean_absolute_error: 0.083615, acc: 0.991438, mean_q: 1.000000 21581/100000: episode: 28, duration: 7.471s, episode steps: 947, steps per second: 127, episode reward: 440.000, mean reward: 0.465 [0.000, 200.000], mean action: 7.957 [0.000, 8.000], mean observation: 72.800 [0.000, 228.000], loss: 23.473624, mean_absolute_error: 0.056346, acc: 0.988372, mean_q: 1.000000 22426/100000: episode: 29, duration: 6.663s, episode steps: 845, steps per second: 127, episode reward: 840.000, mean reward: 0.994 [0.000, 400.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 121.085431, mean_absolute_error: 0.113332, acc: 0.994076, mean_q: 1.000000 23047/100000: episode: 30, duration: 4.883s, episode steps: 621, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 23664/100000: episode: 31, duration: 4.853s, episode steps: 617, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000 24948/100000: episode: 32, duration: 10.138s, episode steps: 1284, steps per second: 127, episode reward: 520.000, mean reward: 0.405 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.676 [0.000, 228.000], loss: 17.623777, mean_absolute_error: 0.047725, acc: 0.994544, mean_q: 1.000000 25576/100000: episode: 33, duration: 4.944s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562681, mean_absolute_error: 0.015245, acc: 0.993620, mean_q: 1.000000 26208/100000: episode: 34, duration: 4.981s, episode steps: 632, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [4.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.560683, mean_absolute_error: 0.015679, acc: 0.992076, mean_q: 1.000000 26819/100000: episode: 35, duration: 4.820s, episode steps: 611, steps per second: 127, episode reward: 100.000, mean reward: 0.164 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.898 [0.000, 228.000], loss: 0.825394, mean_absolute_error: 0.021634, acc: 0.991803, mean_q: 1.000000 27757/100000: episode: 36, duration: 7.389s, episode steps: 938, steps per second: 127, episode reward: 440.000, mean reward: 0.469 [0.000, 200.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.783 [0.000, 228.000], loss: 23.692752, mean_absolute_error: 0.054763, acc: 0.994664, mean_q: 1.000000 28599/100000: episode: 37, duration: 6.656s, episode steps: 842, steps per second: 127, episode reward: 440.000, mean reward: 0.523 [0.000, 200.000], mean action: 7.973 [1.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 26.398447, mean_absolute_error: 0.061280, acc: 0.992866, mean_q: 1.000000 29238/100000: episode: 38, duration: 5.097s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551429, mean_absolute_error: 0.014485, acc: 0.995298, mean_q: 1.000000 30236/100000: episode: 39, duration: 7.870s, episode steps: 998, steps per second: 127, episode reward: 840.000, mean reward: 0.842 [0.000, 400.000], mean action: 7.967 [0.000, 8.000], mean observation: 72.828 [0.000, 228.000], loss: 102.505608, mean_absolute_error: 0.096773, acc: 0.992979, mean_q: 1.000000 30870/100000: episode: 40, duration: 5.020s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000 31498/100000: episode: 41, duration: 4.984s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 32133/100000: episode: 42, duration: 5.002s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554908, mean_absolute_error: 0.014569, acc: 0.995268, mean_q: 1.000000 32764/100000: episode: 43, duration: 4.970s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 33384/100000: episode: 44, duration: 4.898s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.984 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569953, mean_absolute_error: 0.015428, acc: 0.993538, mean_q: 1.000000 34004/100000: episode: 45, duration: 4.894s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000 34638/100000: episode: 46, duration: 4.976s, episode steps: 634, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557348, mean_absolute_error: 0.015111, acc: 0.993681, mean_q: 1.000000 35257/100000: episode: 47, duration: 4.872s, episode steps: 619, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 35885/100000: episode: 48, duration: 4.941s, episode steps: 628, steps per second: 127, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.959 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.884180, mean_absolute_error: 0.023372, acc: 0.990431, mean_q: 1.000000 36510/100000: episode: 49, duration: 4.929s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 37137/100000: episode: 50, duration: 4.964s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 37755/100000: episode: 51, duration: 4.867s, episode steps: 618, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000 38625/100000: episode: 52, duration: 6.905s, episode steps: 870, steps per second: 126, episode reward: 840.000, mean reward: 0.966 [0.000, 400.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 117.603101, mean_absolute_error: 0.110483, acc: 0.993096, mean_q: 1.000000 39260/100000: episode: 53, duration: 5.013s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554908, mean_absolute_error: 0.014569, acc: 0.995268, mean_q: 1.000000 39899/100000: episode: 54, duration: 5.069s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 40534/100000: episode: 55, duration: 5.144s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 41166/100000: episode: 56, duration: 5.067s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 41784/100000: episode: 57, duration: 4.906s, episode steps: 618, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.584799, mean_absolute_error: 0.014408, acc: 0.996759, mean_q: 1.000000 42415/100000: episode: 58, duration: 5.117s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563145, mean_absolute_error: 0.016226, acc: 0.990476, mean_q: 1.000000 43037/100000: episode: 59, duration: 4.936s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 43655/100000: episode: 60, duration: 4.930s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 44268/100000: episode: 61, duration: 4.983s, episode steps: 613, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000 44893/100000: episode: 62, duration: 4.937s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000 45653/100000: episode: 63, duration: 6.066s, episode steps: 760, steps per second: 125, episode reward: 240.000, mean reward: 0.316 [0.000, 50.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 2.901315, mean_absolute_error: 0.038071, acc: 0.993412, mean_q: 1.000000 46295/100000: episode: 64, duration: 5.093s, episode steps: 642, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 46931/100000: episode: 65, duration: 5.019s, episode steps: 636, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 47553/100000: episode: 66, duration: 4.897s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.986 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000 48180/100000: episode: 67, duration: 4.945s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 48804/100000: episode: 68, duration: 4.913s, episode steps: 624, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000 49433/100000: episode: 69, duration: 4.955s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000 50063/100000: episode: 70, duration: 5.026s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 50704/100000: episode: 71, duration: 5.087s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 51449/100000: episode: 72, duration: 5.895s, episode steps: 745, steps per second: 126, episode reward: 240.000, mean reward: 0.322 [0.000, 50.000], mean action: 7.952 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 2.965130, mean_absolute_error: 0.040590, acc: 0.987903, mean_q: 1.000000 52075/100000: episode: 73, duration: 4.940s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 52703/100000: episode: 74, duration: 4.941s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [1.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.562681, mean_absolute_error: 0.015245, acc: 0.993620, mean_q: 1.000000 53332/100000: episode: 75, duration: 4.983s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 53969/100000: episode: 76, duration: 5.072s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 54597/100000: episode: 77, duration: 5.116s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 55228/100000: episode: 78, duration: 5.108s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.971 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560002, mean_absolute_error: 0.015178, acc: 0.993651, mean_q: 1.000000 55859/100000: episode: 79, duration: 5.018s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.541145, mean_absolute_error: 0.013781, acc: 0.996825, mean_q: 1.000000 56487/100000: episode: 80, duration: 5.084s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014368, acc: 0.996810, mean_q: 1.000000 57121/100000: episode: 81, duration: 5.067s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.984 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000 57801/100000: episode: 82, duration: 5.387s, episode steps: 680, steps per second: 126, episode reward: 100.000, mean reward: 0.147 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 0.737148, mean_absolute_error: 0.018090, acc: 0.997054, mean_q: 1.000000 58429/100000: episode: 83, duration: 4.997s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 59050/100000: episode: 84, duration: 4.890s, episode steps: 621, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 59677/100000: episode: 85, duration: 4.933s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [5.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 60315/100000: episode: 86, duration: 5.016s, episode steps: 638, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 60924/100000: episode: 87, duration: 4.881s, episode steps: 609, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.989 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.578635, mean_absolute_error: 0.015144, acc: 0.995066, mean_q: 1.000000 61549/100000: episode: 88, duration: 5.010s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 62177/100000: episode: 89, duration: 4.986s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000 63110/100000: episode: 90, duration: 7.407s, episode steps: 933, steps per second: 126, episode reward: 440.000, mean reward: 0.472 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.788 [0.000, 228.000], loss: 23.818796, mean_absolute_error: 0.054697, acc: 0.995708, mean_q: 1.000000 63732/100000: episode: 91, duration: 4.910s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 64354/100000: episode: 92, duration: 5.029s, episode steps: 622, steps per second: 124, episode reward: 80.000, mean reward: 0.129 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.645283, mean_absolute_error: 0.016105, acc: 0.996779, mean_q: 1.000000 64972/100000: episode: 93, duration: 5.117s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000 65567/100000: episode: 94, duration: 4.784s, episode steps: 595, steps per second: 124, episode reward: 90.000, mean reward: 0.151 [0.000, 10.000], mean action: 7.976 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.743619, mean_absolute_error: 0.018838, acc: 0.994949, mean_q: 1.000000 66193/100000: episode: 95, duration: 5.162s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 66981/100000: episode: 96, duration: 6.200s, episode steps: 788, steps per second: 127, episode reward: 240.000, mean reward: 0.305 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 2.795577, mean_absolute_error: 0.035917, acc: 0.996188, mean_q: 1.000000 67628/100000: episode: 97, duration: 5.357s, episode steps: 647, steps per second: 121, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.543068, mean_absolute_error: 0.013808, acc: 0.996904, mean_q: 1.000000 68259/100000: episode: 98, duration: 4.961s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.984 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 68885/100000: episode: 99, duration: 4.916s, episode steps: 626, steps per second: 127, episode reward: 80.000, mean reward: 0.128 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.644322, mean_absolute_error: 0.017065, acc: 0.993600, mean_q: 1.000000 69511/100000: episode: 100, duration: 4.927s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 70147/100000: episode: 101, duration: 5.125s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 70766/100000: episode: 102, duration: 5.038s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 71392/100000: episode: 103, duration: 5.094s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 72022/100000: episode: 104, duration: 5.135s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 72636/100000: episode: 105, duration: 4.888s, episode steps: 614, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 73349/100000: episode: 106, duration: 5.632s, episode steps: 713, steps per second: 127, episode reward: 90.000, mean reward: 0.126 [0.000, 10.000], mean action: 7.990 [3.000, 8.000], mean observation: 72.905 [0.000, 228.000], loss: 0.634291, mean_absolute_error: 0.016209, acc: 0.995787, mean_q: 1.000000 73982/100000: episode: 107, duration: 5.060s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000 74604/100000: episode: 108, duration: 4.916s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 75216/100000: episode: 109, duration: 4.820s, episode steps: 612, steps per second: 127, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.574174, mean_absolute_error: 0.014535, acc: 0.996727, mean_q: 1.000000 75844/100000: episode: 110, duration: 4.956s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 76463/100000: episode: 111, duration: 4.880s, episode steps: 619, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.569273, mean_absolute_error: 0.014917, acc: 0.995146, mean_q: 1.000000 77085/100000: episode: 112, duration: 4.921s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 77720/100000: episode: 113, duration: 5.000s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 78350/100000: episode: 114, duration: 4.982s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000 79292/100000: episode: 115, duration: 7.416s, episode steps: 942, steps per second: 127, episode reward: 840.000, mean reward: 0.892 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.790 [0.000, 228.000], loss: 108.600573, mean_absolute_error: 0.100712, acc: 0.997875, mean_q: 1.000000 79912/100000: episode: 116, duration: 4.890s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 80542/100000: episode: 117, duration: 4.974s, episode steps: 630, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 81149/100000: episode: 118, duration: 4.792s, episode steps: 607, steps per second: 127, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.578911, mean_absolute_error: 0.014646, acc: 0.996700, mean_q: 1.000000 82026/100000: episode: 119, duration: 6.922s, episode steps: 877, steps per second: 127, episode reward: 440.000, mean reward: 0.502 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.796 [0.000, 228.000], loss: 25.339196, mean_absolute_error: 0.057369, acc: 0.997717, mean_q: 1.000000 82659/100000: episode: 120, duration: 4.986s, episode steps: 633, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 83288/100000: episode: 121, duration: 4.949s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 83917/100000: episode: 122, duration: 4.984s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 84546/100000: episode: 123, duration: 4.971s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 85170/100000: episode: 124, duration: 4.939s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 85796/100000: episode: 125, duration: 4.954s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 86416/100000: episode: 126, duration: 4.905s, episode steps: 620, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 87038/100000: episode: 127, duration: 4.916s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 87661/100000: episode: 128, duration: 4.946s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 88289/100000: episode: 129, duration: 4.945s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000 88920/100000: episode: 130, duration: 4.979s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 89547/100000: episode: 131, duration: 4.935s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 90683/100000: episode: 132, duration: 8.939s, episode steps: 1136, steps per second: 127, episode reward: 440.000, mean reward: 0.387 [0.000, 200.000], mean action: 7.995 [4.000, 8.000], mean observation: 72.763 [0.000, 228.000], loss: 19.557831, mean_absolute_error: 0.044822, acc: 0.997357, mean_q: 1.000000 91299/100000: episode: 133, duration: 4.879s, episode steps: 616, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 91913/100000: episode: 134, duration: 4.856s, episode steps: 614, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 92540/100000: episode: 135, duration: 4.927s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 93168/100000: episode: 136, duration: 4.948s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 93796/100000: episode: 137, duration: 4.945s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 94433/100000: episode: 138, duration: 5.023s, episode steps: 637, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.536040, mean_absolute_error: 0.013662, acc: 0.996855, mean_q: 1.000000 95050/100000: episode: 139, duration: 5.681s, episode steps: 617, steps per second: 109, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000 95686/100000: episode: 140, duration: 5.984s, episode steps: 636, steps per second: 106, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 96327/100000: episode: 141, duration: 5.046s, episode steps: 641, steps per second: 127, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 96953/100000: episode: 142, duration: 4.919s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 97576/100000: episode: 143, duration: 4.899s, episode steps: 623, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 98207/100000: episode: 144, duration: 4.985s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 98841/100000: episode: 145, duration: 4.987s, episode steps: 634, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 99484/100000: episode: 146, duration: 5.079s, episode steps: 643, steps per second: 127, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 done, took 829.107 seconds Testing for 10 episodes ... Episode 1: reward: 70.000, steps: 621 Episode 2: reward: 70.000, steps: 629 Episode 3: reward: 70.000, steps: 618 Episode 4: reward: 70.000, steps: 628 Episode 5: reward: 70.000, steps: 629 Episode 6: reward: 70.000, steps: 633 Episode 7: reward: 70.000, steps: 614 Episode 8: reward: 70.000, steps: 620 Episode 9: reward: 70.000, steps: 641 Episode 10: reward: 70.000, steps: 627
<keras.callbacks.History at 0x137bf7e48>
## Plot loss and accuracy por 100 episodes, Loss is decrasing and accuracy is growing during the first 100 episodes
## The reward seems to be the same but the loss function gets smaller
episodes = list(range(0, 144))
loss_Sarsa_Bolzman = [3.28,22.82,122.84,0.744,0.585,26.28,24.78,2.308,0.561,25.376,
1.2509,0.571,110.953,0.565,0.563,0.563,26.851,1.032,0.554,0.565,
0.656,0.563,0.751,0.570,96.41,2.667,87.50,23.47,121.085,0.565,0.569,
17.62,0.562,0.560,0.825,23.69,26.398,0.551,102.505,0.555,0.559,0.554,
0.558,0.569,0.568,0.557,0.567,0.884,0.560,0.560,0.570,117.603,0.554,
0.548,0.553,0.554,0.584,0.563,0.563,0.566,0.571,0.563,2.901,0.545,0.550,
0.566,0.558,0.564,0.560,0.557,0.546,2.965,0.559,0.562,0.562,0.557,0.551,
0.557,0.560,0.541,0.559,0.555,0.737,0.559,0.564,0.561,0.549,0.578,0.560,
0.561,23.818,0.564,0.645,0.568,0.743,0.561, 2.795,0.543,0.558,0.644,0.559,
0.550,0.567,0.562,0.556,0.570,0.634,0.556,0.563,0.574,0.557,0.569,108.60,
0.566,0.556,0.578,25.339,0.555,0.558,0.557,0.558,0.563,0.559,0.565,0.564,
0.564,0.561,0.555,0.560,19.557,0.568,0.570,0.560,0.559,0.559,0.536,0.569,
0.552,0.546,0.561,0.562,0.556,0.552,0.544]
plt.plot(episodes, loss_Sarsa_Bolzman, 'r--')
plt.axis([0, 110, 0, 10])
plt.show()
sarsa = SARSAAgent(model, nb_actions,
policy=policy, test_policy=None,
gamma=0.99, nb_steps_warmup=10,
train_interval=1)
sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
sarsa.fit(env, nb_steps=1000000, visualize=True, verbose=2)
sarsa.test(env, nb_episodes=10, visualize=True)
Training for 1000000 steps ... 777/1000000: episode: 1, duration: 8.665s, episode steps: 777, steps per second: 90, episode reward: 230.000, mean reward: 0.296 [0.000, 50.000], mean action: 7.069 [0.000, 8.000], mean observation: 72.865 [0.000, 228.000], loss: 2.938891, mean_absolute_error: 0.084981, acc: 0.802872, mean_q: 1.000000 1743/1000000: episode: 2, duration: 7.822s, episode steps: 966, steps per second: 123, episode reward: 450.000, mean reward: 0.466 [0.000, 200.000], mean action: 7.844 [0.000, 8.000], mean observation: 72.801 [0.000, 228.000], loss: 23.077522, mean_absolute_error: 0.064501, acc: 0.962694, mean_q: 1.000000 2818/1000000: episode: 3, duration: 8.631s, episode steps: 1075, steps per second: 125, episode reward: 240.000, mean reward: 0.223 [0.000, 50.000], mean action: 7.829 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 2.101268, mean_absolute_error: 0.038194, acc: 0.960894, mean_q: 1.000000 3425/1000000: episode: 4, duration: 4.910s, episode steps: 607, steps per second: 124, episode reward: 160.000, mean reward: 0.264 [0.000, 10.000], mean action: 7.908 [0.000, 8.000], mean observation: 72.879 [0.000, 228.000], loss: 1.326534, mean_absolute_error: 0.038376, acc: 0.973597, mean_q: 1.000000 4049/1000000: episode: 5, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.875 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.585362, mean_absolute_error: 0.021693, acc: 0.974318, mean_q: 1.000000 4670/1000000: episode: 6, duration: 4.949s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.900 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.585000, mean_absolute_error: 0.020728, acc: 0.977419, mean_q: 1.000000 5643/1000000: episode: 7, duration: 7.760s, episode steps: 973, steps per second: 125, episode reward: 390.000, mean reward: 0.401 [0.000, 50.000], mean action: 7.906 [0.000, 8.000], mean observation: 72.534 [0.000, 228.000], loss: 3.030100, mean_absolute_error: 0.051387, acc: 0.979424, mean_q: 1.000000 6426/1000000: episode: 8, duration: 6.233s, episode steps: 783, steps per second: 126, episode reward: 440.000, mean reward: 0.562 [0.000, 200.000], mean action: 7.943 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 28.383811, mean_absolute_error: 0.067648, acc: 0.985934, mean_q: 1.000000 7049/1000000: episode: 9, duration: 4.952s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.942 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.573570, mean_absolute_error: 0.017481, acc: 0.987138, mean_q: 1.000000 7673/1000000: episode: 10, duration: 4.990s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.947 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571061, mean_absolute_error: 0.017102, acc: 0.987159, mean_q: 1.000000 8449/1000000: episode: 11, duration: 6.216s, episode steps: 776, steps per second: 125, episode reward: 440.000, mean reward: 0.567 [0.000, 200.000], mean action: 7.936 [0.000, 8.000], mean observation: 72.827 [0.000, 228.000], loss: 28.642736, mean_absolute_error: 0.069101, acc: 0.983226, mean_q: 1.000000 9073/1000000: episode: 12, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566294, mean_absolute_error: 0.015336, acc: 0.993579, mean_q: 1.000000 9698/1000000: episode: 13, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.947 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571732, mean_absolute_error: 0.017429, acc: 0.987179, mean_q: 1.000000 10329/1000000: episode: 14, duration: 5.021s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.959 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.579018, mean_absolute_error: 0.016229, acc: 0.990476, mean_q: 1.000000 10947/1000000: episode: 15, duration: 4.925s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571800, mean_absolute_error: 0.015475, acc: 0.993517, mean_q: 1.000000 11571/1000000: episode: 16, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566294, mean_absolute_error: 0.015513, acc: 0.991974, mean_q: 1.000000 12505/1000000: episode: 17, duration: 7.435s, episode steps: 934, steps per second: 126, episode reward: 840.000, mean reward: 0.899 [0.000, 400.000], mean action: 7.918 [0.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 109.560523, mean_absolute_error: 0.107581, acc: 0.979636, mean_q: 1.000000 13126/1000000: episode: 18, duration: 4.954s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.976 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570630, mean_absolute_error: 0.015937, acc: 0.991935, mean_q: 1.000000 13756/1000000: episode: 19, duration: 5.016s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.940 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565614, mean_absolute_error: 0.016949, acc: 0.987281, mean_q: 1.000000 14385/1000000: episode: 20, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.970 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564938, mean_absolute_error: 0.016449, acc: 0.988854, mean_q: 1.000000 15101/1000000: episode: 21, duration: 5.736s, episode steps: 716, steps per second: 125, episode reward: 100.000, mean reward: 0.140 [0.000, 10.000], mean action: 7.971 [1.000, 8.000], mean observation: 72.934 [0.000, 228.000], loss: 0.704189, mean_absolute_error: 0.018620, acc: 0.993007, mean_q: 1.000000 15732/1000000: episode: 22, duration: 5.047s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 16360/1000000: episode: 23, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.965 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.868391, mean_absolute_error: 0.023197, acc: 0.988836, mean_q: 1.000000 16985/1000000: episode: 24, duration: 5.036s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.965 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.568559, mean_absolute_error: 0.016371, acc: 0.990385, mean_q: 1.000000 17612/1000000: episode: 25, duration: 5.049s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.984 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.579554, mean_absolute_error: 0.015272, acc: 0.993610, mean_q: 1.000000 18234/1000000: episode: 26, duration: 4.994s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.950 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.571306, mean_absolute_error: 0.016445, acc: 0.990338, mean_q: 1.000000 18849/1000000: episode: 27, duration: 4.954s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.574594, mean_absolute_error: 0.015545, acc: 0.993485, mean_q: 1.000000 19476/1000000: episode: 28, duration: 5.024s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 20181/1000000: episode: 29, duration: 5.674s, episode steps: 705, steps per second: 124, episode reward: 100.000, mean reward: 0.142 [0.000, 10.000], mean action: 7.974 [1.000, 8.000], mean observation: 72.934 [0.000, 228.000], loss: 0.713785, mean_absolute_error: 0.018425, acc: 0.994318, mean_q: 1.000000 20809/1000000: episode: 30, duration: 5.042s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 21442/1000000: episode: 31, duration: 5.079s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 22065/1000000: episode: 32, duration: 4.966s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568796, mean_absolute_error: 0.015890, acc: 0.991961, mean_q: 1.000000 22688/1000000: episode: 33, duration: 5.005s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567204, mean_absolute_error: 0.015359, acc: 0.993569, mean_q: 1.000000 23320/1000000: episode: 34, duration: 5.074s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000 23938/1000000: episode: 35, duration: 4.988s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.985 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573405, mean_absolute_error: 0.016009, acc: 0.991896, mean_q: 1.000000 24625/1000000: episode: 36, duration: 5.498s, episode steps: 687, steps per second: 125, episode reward: 100.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.735399, mean_absolute_error: 0.019841, acc: 0.991254, mean_q: 1.000000 25250/1000000: episode: 37, duration: 5.021s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 25876/1000000: episode: 38, duration: 5.105s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566066, mean_absolute_error: 0.015819, acc: 0.992000, mean_q: 1.000000 26516/1000000: episode: 39, duration: 5.446s, episode steps: 640, steps per second: 118, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.986 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550566, mean_absolute_error: 0.014464, acc: 0.995305, mean_q: 1.000000 27143/1000000: episode: 40, duration: 5.047s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565161, mean_absolute_error: 0.015795, acc: 0.992013, mean_q: 1.000000 27873/1000000: episode: 41, duration: 5.925s, episode steps: 730, steps per second: 123, episode reward: 240.000, mean reward: 0.329 [0.000, 50.000], mean action: 7.974 [1.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 3.035784, mean_absolute_error: 0.040047, acc: 0.991770, mean_q: 1.000000 28494/1000000: episode: 42, duration: 5.243s, episode steps: 621, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 29434/1000000: episode: 43, duration: 7.656s, episode steps: 940, steps per second: 123, episode reward: 440.000, mean reward: 0.468 [0.000, 200.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 23.642288, mean_absolute_error: 0.054649, acc: 0.994675, mean_q: 1.000000 30070/1000000: episode: 44, duration: 5.128s, episode steps: 636, steps per second: 124, episode reward: 110.000, mean reward: 0.173 [0.000, 10.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.876 [0.000, 228.000], loss: 0.869924, mean_absolute_error: 0.022052, acc: 0.993701, mean_q: 1.000000 31159/1000000: episode: 45, duration: 8.829s, episode steps: 1089, steps per second: 123, episode reward: 440.000, mean reward: 0.404 [0.000, 200.000], mean action: 7.963 [0.000, 8.000], mean observation: 72.747 [0.000, 228.000], loss: 20.407248, mean_absolute_error: 0.048227, acc: 0.992647, mean_q: 1.000000 31982/1000000: episode: 46, duration: 6.783s, episode steps: 823, steps per second: 121, episode reward: 440.000, mean reward: 0.535 [0.000, 200.000], mean action: 7.981 [3.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 27.008629, mean_absolute_error: 0.062671, acc: 0.992701, mean_q: 1.000000 32608/1000000: episode: 47, duration: 5.253s, episode steps: 626, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 33224/1000000: episode: 48, duration: 5.169s, episode steps: 616, steps per second: 119, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.982 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555952, mean_absolute_error: 0.014627, acc: 0.995122, mean_q: 1.000000 33852/1000000: episode: 49, duration: 5.272s, episode steps: 628, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000 34483/1000000: episode: 50, duration: 5.450s, episode steps: 631, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 35101/1000000: episode: 51, duration: 4.996s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000 35738/1000000: episode: 52, duration: 5.098s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [4.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553163, mean_absolute_error: 0.014527, acc: 0.995283, mean_q: 1.000000 36359/1000000: episode: 53, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 36988/1000000: episode: 54, duration: 5.049s, episode steps: 629, steps per second: 125, episode reward: 80.000, mean reward: 0.127 [0.000, 10.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.641244, mean_absolute_error: 0.016989, acc: 0.993631, mean_q: 1.000000 37608/1000000: episode: 55, duration: 4.946s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000 38223/1000000: episode: 56, duration: 4.909s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.985 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.572981, mean_absolute_error: 0.015007, acc: 0.995114, mean_q: 1.000000 38838/1000000: episode: 57, duration: 4.982s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000 39677/1000000: episode: 58, duration: 6.716s, episode steps: 839, steps per second: 125, episode reward: 840.000, mean reward: 1.001 [0.000, 400.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 121.950028, mean_absolute_error: 0.113348, acc: 0.996420, mean_q: 1.000000 40303/1000000: episode: 59, duration: 5.016s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 41165/1000000: episode: 60, duration: 6.909s, episode steps: 862, steps per second: 125, episode reward: 840.000, mean reward: 0.974 [0.000, 400.000], mean action: 7.980 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 118.694663, mean_absolute_error: 0.111116, acc: 0.994193, mean_q: 1.000000 41798/1000000: episode: 61, duration: 5.082s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000 42431/1000000: episode: 62, duration: 5.072s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.558230, mean_absolute_error: 0.015134, acc: 0.993671, mean_q: 1.000000 43062/1000000: episode: 63, duration: 5.058s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 43687/1000000: episode: 64, duration: 4.995s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 44313/1000000: episode: 65, duration: 5.025s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 44933/1000000: episode: 66, duration: 4.991s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 45552/1000000: episode: 67, duration: 4.975s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 46786/1000000: episode: 68, duration: 9.825s, episode steps: 1234, steps per second: 126, episode reward: 440.000, mean reward: 0.357 [0.000, 200.000], mean action: 7.996 [5.000, 8.000], mean observation: 72.733 [0.000, 228.000], loss: 18.003360, mean_absolute_error: 0.041348, acc: 0.997567, mean_q: 1.000000 47406/1000000: episode: 69, duration: 5.091s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 48342/1000000: episode: 70, duration: 7.861s, episode steps: 936, steps per second: 119, episode reward: 840.000, mean reward: 0.897 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 109.297474, mean_absolute_error: 0.101351, acc: 0.997861, mean_q: 1.000000 48969/1000000: episode: 71, duration: 5.102s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 49584/1000000: episode: 72, duration: 5.140s, episode steps: 615, steps per second: 120, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.572981, mean_absolute_error: 0.015007, acc: 0.995114, mean_q: 1.000000 50513/1000000: episode: 73, duration: 7.933s, episode steps: 929, steps per second: 117, episode reward: 240.000, mean reward: 0.258 [0.000, 50.000], mean action: 7.982 [1.000, 8.000], mean observation: 72.764 [0.000, 228.000], loss: 2.371892, mean_absolute_error: 0.030984, acc: 0.995690, mean_q: 1.000000 51129/1000000: episode: 74, duration: 4.978s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.982 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.573659, mean_absolute_error: 0.015521, acc: 0.993496, mean_q: 1.000000 51760/1000000: episode: 75, duration: 5.522s, episode steps: 631, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 52391/1000000: episode: 76, duration: 5.124s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 53006/1000000: episode: 77, duration: 4.966s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 53634/1000000: episode: 78, duration: 5.038s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 54268/1000000: episode: 79, duration: 5.111s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000 54892/1000000: episode: 80, duration: 5.016s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 55570/1000000: episode: 81, duration: 5.431s, episode steps: 678, steps per second: 125, episode reward: 100.000, mean reward: 0.147 [0.000, 10.000], mean action: 7.981 [1.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 0.740788, mean_absolute_error: 0.018628, acc: 0.995569, mean_q: 1.000000 56203/1000000: episode: 82, duration: 5.093s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000 56836/1000000: episode: 83, duration: 5.085s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 57452/1000000: episode: 84, duration: 4.944s, episode steps: 616, steps per second: 125, episode reward: 90.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.734326, mean_absolute_error: 0.018591, acc: 0.995122, mean_q: 1.000000 58075/1000000: episode: 85, duration: 5.017s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 58703/1000000: episode: 86, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 59333/1000000: episode: 87, duration: 5.059s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000 60127/1000000: episode: 88, duration: 6.351s, episode steps: 794, steps per second: 125, episode reward: 240.000, mean reward: 0.302 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.775674, mean_absolute_error: 0.036070, acc: 0.994956, mean_q: 1.000000 60755/1000000: episode: 89, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 61384/1000000: episode: 90, duration: 5.101s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 62394/1000000: episode: 91, duration: 8.084s, episode steps: 1010, steps per second: 125, episode reward: 840.000, mean reward: 0.832 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 101.281608, mean_absolute_error: 0.094000, acc: 0.998018, mean_q: 1.000000 63028/1000000: episode: 92, duration: 5.045s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 63627/1000000: episode: 93, duration: 4.799s, episode steps: 599, steps per second: 125, episode reward: 130.000, mean reward: 0.217 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 1.088979, mean_absolute_error: 0.026505, acc: 0.994983, mean_q: 1.000000 64250/1000000: episode: 94, duration: 5.007s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.970 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.568796, mean_absolute_error: 0.015890, acc: 0.991961, mean_q: 1.000000 64877/1000000: episode: 95, duration: 5.039s, episode steps: 627, steps per second: 124, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.987 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.880848, mean_absolute_error: 0.021826, acc: 0.995208, mean_q: 1.000000 65495/1000000: episode: 96, duration: 4.993s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000 66127/1000000: episode: 97, duration: 5.072s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 66757/1000000: episode: 98, duration: 5.076s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000 67386/1000000: episode: 99, duration: 5.056s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 68010/1000000: episode: 100, duration: 5.167s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 68637/1000000: episode: 101, duration: 5.054s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 69270/1000000: episode: 102, duration: 5.067s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 69899/1000000: episode: 103, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 70523/1000000: episode: 104, duration: 5.003s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 71134/1000000: episode: 105, duration: 4.940s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.575115, mean_absolute_error: 0.014557, acc: 0.996721, mean_q: 1.000000 71760/1000000: episode: 106, duration: 5.358s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 72379/1000000: episode: 107, duration: 5.078s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 73008/1000000: episode: 108, duration: 5.133s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 73642/1000000: episode: 109, duration: 5.542s, episode steps: 634, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 74276/1000000: episode: 110, duration: 5.082s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 74896/1000000: episode: 111, duration: 5.017s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 75523/1000000: episode: 112, duration: 5.050s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.984 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 76145/1000000: episode: 113, duration: 5.004s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000 76763/1000000: episode: 114, duration: 5.041s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000 77393/1000000: episode: 115, duration: 5.421s, episode steps: 630, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 78024/1000000: episode: 116, duration: 5.238s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 78657/1000000: episode: 117, duration: 5.127s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 79658/1000000: episode: 118, duration: 8.076s, episode steps: 1001, steps per second: 124, episode reward: 240.000, mean reward: 0.240 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.757 [0.000, 228.000], loss: 2.199140, mean_absolute_error: 0.028173, acc: 0.998000, mean_q: 1.000000 80290/1000000: episode: 119, duration: 5.097s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 80908/1000000: episode: 120, duration: 4.965s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 81535/1000000: episode: 121, duration: 5.057s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 82163/1000000: episode: 122, duration: 5.498s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000 82789/1000000: episode: 123, duration: 5.050s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 83384/1000000: episode: 124, duration: 4.804s, episode steps: 595, steps per second: 124, episode reward: 150.000, mean reward: 0.252 [0.000, 10.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 1.262659, mean_absolute_error: 0.029854, acc: 0.996633, mean_q: 1.000000 84011/1000000: episode: 125, duration: 5.025s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 84632/1000000: episode: 126, duration: 5.008s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 85437/1000000: episode: 127, duration: 6.496s, episode steps: 805, steps per second: 124, episode reward: 440.000, mean reward: 0.547 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 27.608373, mean_absolute_error: 0.062407, acc: 0.997512, mean_q: 1.000000 86065/1000000: episode: 128, duration: 5.578s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 86691/1000000: episode: 129, duration: 5.067s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 87314/1000000: episode: 130, duration: 5.019s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 87940/1000000: episode: 131, duration: 5.044s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 88574/1000000: episode: 132, duration: 5.099s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 89208/1000000: episode: 133, duration: 5.165s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 89844/1000000: episode: 134, duration: 5.418s, episode steps: 636, steps per second: 117, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569782, mean_absolute_error: 0.014551, acc: 0.995276, mean_q: 1.000000 90466/1000000: episode: 135, duration: 5.059s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 91096/1000000: episode: 136, duration: 5.037s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 91723/1000000: episode: 137, duration: 5.036s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 92359/1000000: episode: 138, duration: 5.092s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 92990/1000000: episode: 139, duration: 5.032s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 93619/1000000: episode: 140, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 80.000, mean reward: 0.127 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.639668, mean_absolute_error: 0.016463, acc: 0.995223, mean_q: 1.000000 94239/1000000: episode: 141, duration: 4.976s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 94856/1000000: episode: 142, duration: 4.992s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 95489/1000000: episode: 143, duration: 5.062s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 96118/1000000: episode: 144, duration: 5.067s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 96742/1000000: episode: 145, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 97372/1000000: episode: 146, duration: 5.077s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 98010/1000000: episode: 147, duration: 5.086s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 98630/1000000: episode: 148, duration: 4.953s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 99262/1000000: episode: 149, duration: 5.040s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 99886/1000000: episode: 150, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000 100507/1000000: episode: 151, duration: 5.023s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 101281/1000000: episode: 152, duration: 6.234s, episode steps: 774, steps per second: 124, episode reward: 240.000, mean reward: 0.310 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.844927, mean_absolute_error: 0.036120, acc: 0.997413, mean_q: 1.000000 101909/1000000: episode: 153, duration: 5.073s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 102547/1000000: episode: 154, duration: 5.248s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000 103176/1000000: episode: 155, duration: 5.442s, episode steps: 629, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 103790/1000000: episode: 156, duration: 4.944s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 104424/1000000: episode: 157, duration: 5.090s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 105061/1000000: episode: 158, duration: 5.155s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 105690/1000000: episode: 159, duration: 5.063s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 106325/1000000: episode: 160, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 106957/1000000: episode: 161, duration: 5.073s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014284, acc: 0.995246, mean_q: 1.000000 107582/1000000: episode: 162, duration: 5.071s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 108213/1000000: episode: 163, duration: 5.382s, episode steps: 631, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 108848/1000000: episode: 164, duration: 5.602s, episode steps: 635, steps per second: 113, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 109475/1000000: episode: 165, duration: 5.399s, episode steps: 627, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 110103/1000000: episode: 166, duration: 5.044s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 110724/1000000: episode: 167, duration: 4.978s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 111353/1000000: episode: 168, duration: 5.056s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 112417/1000000: episode: 169, duration: 8.556s, episode steps: 1064, steps per second: 124, episode reward: 840.000, mean reward: 0.789 [0.000, 400.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 96.137474, mean_absolute_error: 0.089591, acc: 0.997178, mean_q: 1.000000 113042/1000000: episode: 170, duration: 5.015s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 114392/1000000: episode: 171, duration: 10.825s, episode steps: 1350, steps per second: 125, episode reward: 240.000, mean reward: 0.178 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.824 [0.000, 228.000], loss: 1.630947, mean_absolute_error: 0.021417, acc: 0.997776, mean_q: 1.000000 115010/1000000: episode: 172, duration: 5.067s, episode steps: 618, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 115642/1000000: episode: 173, duration: 5.089s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 116268/1000000: episode: 174, duration: 5.063s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 117381/1000000: episode: 175, duration: 8.975s, episode steps: 1113, steps per second: 124, episode reward: 440.000, mean reward: 0.395 [0.000, 200.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 19.962354, mean_absolute_error: 0.045726, acc: 0.997302, mean_q: 1.000000 117997/1000000: episode: 176, duration: 4.968s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.572050, mean_absolute_error: 0.014985, acc: 0.995122, mean_q: 1.000000 118619/1000000: episode: 177, duration: 5.040s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 119240/1000000: episode: 178, duration: 5.007s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 119852/1000000: episode: 179, duration: 4.931s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.575794, mean_absolute_error: 0.015256, acc: 0.993453, mean_q: 1.000000 120483/1000000: episode: 180, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 121106/1000000: episode: 181, duration: 5.026s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 121720/1000000: episode: 182, duration: 4.955s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.572301, mean_absolute_error: 0.014492, acc: 0.996737, mean_q: 1.000000 122356/1000000: episode: 183, duration: 5.142s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 122989/1000000: episode: 184, duration: 5.098s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 123607/1000000: episode: 185, duration: 4.972s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.974 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.573405, mean_absolute_error: 0.016009, acc: 0.991896, mean_q: 1.000000 124231/1000000: episode: 186, duration: 4.982s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567883, mean_absolute_error: 0.015866, acc: 0.991974, mean_q: 1.000000 124865/1000000: episode: 187, duration: 5.061s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 125494/1000000: episode: 188, duration: 5.025s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 126111/1000000: episode: 189, duration: 4.914s, episode steps: 617, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 126732/1000000: episode: 190, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 127353/1000000: episode: 191, duration: 4.976s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 127972/1000000: episode: 192, duration: 4.972s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.982 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569273, mean_absolute_error: 0.014917, acc: 0.995146, mean_q: 1.000000 128593/1000000: episode: 193, duration: 4.968s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 129226/1000000: episode: 194, duration: 5.157s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 129841/1000000: episode: 195, duration: 4.964s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 130939/1000000: episode: 196, duration: 8.771s, episode steps: 1098, steps per second: 125, episode reward: 240.000, mean reward: 0.219 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.774 [0.000, 228.000], loss: 2.006495, mean_absolute_error: 0.026382, acc: 0.996354, mean_q: 1.000000 132000/1000000: episode: 197, duration: 8.742s, episode steps: 1061, steps per second: 121, episode reward: 840.000, mean reward: 0.792 [0.000, 400.000], mean action: 7.991 [0.000, 8.000], mean observation: 72.836 [0.000, 228.000], loss: 96.409561, mean_absolute_error: 0.089842, acc: 0.997170, mean_q: 1.000000 132625/1000000: episode: 198, duration: 5.386s, episode steps: 625, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 133256/1000000: episode: 199, duration: 5.362s, episode steps: 631, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 133887/1000000: episode: 200, duration: 5.119s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 134530/1000000: episode: 201, duration: 5.429s, episode steps: 643, steps per second: 118, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 135165/1000000: episode: 202, duration: 5.239s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 135790/1000000: episode: 203, duration: 5.083s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 136406/1000000: episode: 204, duration: 4.993s, episode steps: 616, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 137023/1000000: episode: 205, duration: 4.977s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 137645/1000000: episode: 206, duration: 5.079s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.984 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000 138277/1000000: episode: 207, duration: 5.149s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 138906/1000000: episode: 208, duration: 5.186s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 139516/1000000: episode: 209, duration: 4.976s, episode steps: 610, steps per second: 123, episode reward: 90.000, mean reward: 0.148 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.739935, mean_absolute_error: 0.018221, acc: 0.996716, mean_q: 1.000000 140147/1000000: episode: 210, duration: 5.084s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 140777/1000000: episode: 211, duration: 5.093s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 141392/1000000: episode: 212, duration: 4.945s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000 142024/1000000: episode: 213, duration: 5.236s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 142656/1000000: episode: 214, duration: 5.340s, episode steps: 632, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 143290/1000000: episode: 215, duration: 5.075s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000 143917/1000000: episode: 216, duration: 5.041s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 144551/1000000: episode: 217, duration: 5.366s, episode steps: 634, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 145178/1000000: episode: 218, duration: 5.035s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 145814/1000000: episode: 219, duration: 5.123s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 146450/1000000: episode: 220, duration: 5.547s, episode steps: 636, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 147069/1000000: episode: 221, duration: 5.003s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 147701/1000000: episode: 222, duration: 5.100s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 148326/1000000: episode: 223, duration: 5.040s, episode steps: 625, steps per second: 124, episode reward: 110.000, mean reward: 0.176 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.882085, mean_absolute_error: 0.021364, acc: 0.996795, mean_q: 1.000000 148951/1000000: episode: 224, duration: 5.413s, episode steps: 625, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 149580/1000000: episode: 225, duration: 5.214s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 150212/1000000: episode: 226, duration: 5.042s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000 150840/1000000: episode: 227, duration: 5.027s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 151461/1000000: episode: 228, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 152083/1000000: episode: 229, duration: 4.969s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000 152707/1000000: episode: 230, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 153335/1000000: episode: 231, duration: 5.008s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 153962/1000000: episode: 232, duration: 4.987s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 154591/1000000: episode: 233, duration: 5.009s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 155211/1000000: episode: 234, duration: 4.957s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 155839/1000000: episode: 235, duration: 5.117s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 156483/1000000: episode: 236, duration: 5.255s, episode steps: 644, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000 157111/1000000: episode: 237, duration: 5.084s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 157740/1000000: episode: 238, duration: 5.172s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 158377/1000000: episode: 239, duration: 5.128s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 159009/1000000: episode: 240, duration: 5.082s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000 159622/1000000: episode: 241, duration: 4.967s, episode steps: 613, steps per second: 123, episode reward: 100.000, mean reward: 0.163 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.819461, mean_absolute_error: 0.020488, acc: 0.995098, mean_q: 1.000000 160254/1000000: episode: 242, duration: 5.505s, episode steps: 632, steps per second: 115, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 160896/1000000: episode: 243, duration: 5.143s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.548848, mean_absolute_error: 0.014422, acc: 0.995320, mean_q: 1.000000 161535/1000000: episode: 244, duration: 5.120s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 162174/1000000: episode: 245, duration: 5.199s, episode steps: 639, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000 162814/1000000: episode: 246, duration: 5.545s, episode steps: 640, steps per second: 115, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 163444/1000000: episode: 247, duration: 5.089s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 164064/1000000: episode: 248, duration: 5.001s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 164695/1000000: episode: 249, duration: 5.401s, episode steps: 631, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [2.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 165722/1000000: episode: 250, duration: 8.493s, episode steps: 1027, steps per second: 121, episode reward: 240.000, mean reward: 0.234 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 2.143412, mean_absolute_error: 0.027488, acc: 0.998051, mean_q: 1.000000 166352/1000000: episode: 251, duration: 5.105s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 166980/1000000: episode: 252, duration: 5.066s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 167610/1000000: episode: 253, duration: 5.072s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 168238/1000000: episode: 254, duration: 5.542s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 169548/1000000: episode: 255, duration: 10.496s, episode steps: 1310, steps per second: 125, episode reward: 240.000, mean reward: 0.183 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 1.680783, mean_absolute_error: 0.022037, acc: 0.997708, mean_q: 1.000000 170163/1000000: episode: 256, duration: 4.966s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 171066/1000000: episode: 257, duration: 7.415s, episode steps: 903, steps per second: 122, episode reward: 440.000, mean reward: 0.487 [0.000, 200.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 24.610994, mean_absolute_error: 0.056479, acc: 0.995565, mean_q: 1.000000 171702/1000000: episode: 258, duration: 5.216s, episode steps: 636, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 172315/1000000: episode: 259, duration: 5.027s, episode steps: 613, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000 172947/1000000: episode: 260, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 173572/1000000: episode: 261, duration: 5.057s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 174202/1000000: episode: 262, duration: 5.199s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 174827/1000000: episode: 263, duration: 5.225s, episode steps: 625, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 175455/1000000: episode: 264, duration: 5.327s, episode steps: 628, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 176073/1000000: episode: 265, duration: 5.043s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 176697/1000000: episode: 266, duration: 5.463s, episode steps: 624, steps per second: 114, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 177327/1000000: episode: 267, duration: 5.070s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 177951/1000000: episode: 268, duration: 5.337s, episode steps: 624, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 178576/1000000: episode: 269, duration: 5.124s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 179223/1000000: episode: 270, duration: 5.238s, episode steps: 647, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.541536, mean_absolute_error: 0.013297, acc: 0.998452, mean_q: 1.000000 179841/1000000: episode: 271, duration: 5.042s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000 180471/1000000: episode: 272, duration: 5.093s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 181092/1000000: episode: 273, duration: 5.049s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 181718/1000000: episode: 274, duration: 5.399s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 182340/1000000: episode: 275, duration: 5.495s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 182968/1000000: episode: 276, duration: 5.035s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 183595/1000000: episode: 277, duration: 5.329s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000 184226/1000000: episode: 278, duration: 5.953s, episode steps: 631, steps per second: 106, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 184851/1000000: episode: 279, duration: 5.952s, episode steps: 625, steps per second: 105, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000 185479/1000000: episode: 280, duration: 5.378s, episode steps: 628, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 186335/1000000: episode: 281, duration: 7.031s, episode steps: 856, steps per second: 122, episode reward: 840.000, mean reward: 0.981 [0.000, 400.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.803 [0.000, 228.000], loss: 119.525292, mean_absolute_error: 0.111116, acc: 0.996491, mean_q: 1.000000 186957/1000000: episode: 282, duration: 5.255s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 187585/1000000: episode: 283, duration: 5.133s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 188212/1000000: episode: 284, duration: 5.294s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 188844/1000000: episode: 285, duration: 5.435s, episode steps: 632, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 189475/1000000: episode: 286, duration: 5.136s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 190097/1000000: episode: 287, duration: 5.067s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 190729/1000000: episode: 288, duration: 5.240s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 191360/1000000: episode: 289, duration: 5.093s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 191982/1000000: episode: 290, duration: 5.022s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 192614/1000000: episode: 291, duration: 5.366s, episode steps: 632, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 193233/1000000: episode: 292, duration: 5.191s, episode steps: 619, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 193849/1000000: episode: 293, duration: 4.960s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 194470/1000000: episode: 294, duration: 5.017s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 195103/1000000: episode: 295, duration: 5.076s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 196054/1000000: episode: 296, duration: 7.657s, episode steps: 951, steps per second: 124, episode reward: 840.000, mean reward: 0.883 [0.000, 400.000], mean action: 7.989 [0.000, 8.000], mean observation: 72.832 [0.000, 228.000], loss: 107.572768, mean_absolute_error: 0.100116, acc: 0.996842, mean_q: 1.000000 196682/1000000: episode: 297, duration: 5.044s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 197324/1000000: episode: 298, duration: 5.166s, episode steps: 642, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 197959/1000000: episode: 299, duration: 5.185s, episode steps: 635, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 198574/1000000: episode: 300, duration: 4.956s, episode steps: 615, steps per second: 124, episode reward: 80.000, mean reward: 0.130 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.901 [0.000, 228.000], loss: 0.652639, mean_absolute_error: 0.016276, acc: 0.996743, mean_q: 1.000000 199195/1000000: episode: 301, duration: 4.992s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 199841/1000000: episode: 302, duration: 5.200s, episode steps: 646, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.542375, mean_absolute_error: 0.013316, acc: 0.998450, mean_q: 1.000000 200458/1000000: episode: 303, duration: 4.978s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 201079/1000000: episode: 304, duration: 5.025s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 201707/1000000: episode: 305, duration: 5.038s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 202340/1000000: episode: 306, duration: 5.087s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 202979/1000000: episode: 307, duration: 5.193s, episode steps: 639, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000 203610/1000000: episode: 308, duration: 5.070s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 204248/1000000: episode: 309, duration: 5.123s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 204871/1000000: episode: 310, duration: 5.003s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 205504/1000000: episode: 311, duration: 5.076s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 206128/1000000: episode: 312, duration: 5.025s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 206761/1000000: episode: 313, duration: 5.112s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 207374/1000000: episode: 314, duration: 4.939s, episode steps: 613, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.573236, mean_absolute_error: 0.014513, acc: 0.996732, mean_q: 1.000000 208003/1000000: episode: 315, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 208635/1000000: episode: 316, duration: 5.066s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 209257/1000000: episode: 317, duration: 4.981s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 209884/1000000: episode: 318, duration: 5.016s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 210511/1000000: episode: 319, duration: 5.018s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 211128/1000000: episode: 320, duration: 4.947s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 211996/1000000: episode: 321, duration: 7.001s, episode steps: 868, steps per second: 124, episode reward: 440.000, mean reward: 0.507 [0.000, 200.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.796 [0.000, 228.000], loss: 25.602232, mean_absolute_error: 0.057953, acc: 0.997693, mean_q: 1.000000 212619/1000000: episode: 322, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 213241/1000000: episode: 323, duration: 4.954s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 213861/1000000: episode: 324, duration: 4.952s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 214472/1000000: episode: 325, duration: 4.930s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.575115, mean_absolute_error: 0.014557, acc: 0.996721, mean_q: 1.000000 215094/1000000: episode: 326, duration: 5.080s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 215731/1000000: episode: 327, duration: 5.299s, episode steps: 637, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 216368/1000000: episode: 328, duration: 5.182s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 216999/1000000: episode: 329, duration: 5.186s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 217642/1000000: episode: 330, duration: 5.200s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 218267/1000000: episode: 331, duration: 5.065s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 218885/1000000: episode: 332, duration: 4.982s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 219517/1000000: episode: 333, duration: 5.194s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 220146/1000000: episode: 334, duration: 5.573s, episode steps: 629, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 220783/1000000: episode: 335, duration: 5.128s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 221398/1000000: episode: 336, duration: 4.969s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 222022/1000000: episode: 337, duration: 5.033s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 222667/1000000: episode: 338, duration: 5.188s, episode steps: 645, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.544755, mean_absolute_error: 0.013847, acc: 0.996894, mean_q: 1.000000 223286/1000000: episode: 339, duration: 4.974s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 223909/1000000: episode: 340, duration: 5.055s, episode steps: 623, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 224535/1000000: episode: 341, duration: 5.567s, episode steps: 626, steps per second: 112, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 225161/1000000: episode: 342, duration: 5.067s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 225792/1000000: episode: 343, duration: 5.105s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 226425/1000000: episode: 344, duration: 5.111s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 227049/1000000: episode: 345, duration: 5.057s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 227664/1000000: episode: 346, duration: 5.398s, episode steps: 615, steps per second: 114, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000 228290/1000000: episode: 347, duration: 5.051s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 228919/1000000: episode: 348, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 229539/1000000: episode: 349, duration: 5.053s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 230176/1000000: episode: 350, duration: 5.228s, episode steps: 637, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 230803/1000000: episode: 351, duration: 5.228s, episode steps: 627, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 231431/1000000: episode: 352, duration: 5.101s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 232075/1000000: episode: 353, duration: 5.187s, episode steps: 644, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544062, mean_absolute_error: 0.013354, acc: 0.998445, mean_q: 1.000000 232705/1000000: episode: 354, duration: 5.275s, episode steps: 630, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 233340/1000000: episode: 355, duration: 5.577s, episode steps: 635, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 233976/1000000: episode: 356, duration: 5.112s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 234609/1000000: episode: 357, duration: 5.084s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 235243/1000000: episode: 358, duration: 5.096s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 235880/1000000: episode: 359, duration: 5.086s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 236495/1000000: episode: 360, duration: 4.940s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 237117/1000000: episode: 361, duration: 4.982s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 237742/1000000: episode: 362, duration: 5.050s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 238384/1000000: episode: 363, duration: 5.150s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 239018/1000000: episode: 364, duration: 5.270s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 239635/1000000: episode: 365, duration: 5.092s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 240261/1000000: episode: 366, duration: 5.055s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 240897/1000000: episode: 367, duration: 5.117s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 241527/1000000: episode: 368, duration: 5.082s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 242157/1000000: episode: 369, duration: 5.149s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 242779/1000000: episode: 370, duration: 5.526s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 243415/1000000: episode: 371, duration: 5.129s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 244045/1000000: episode: 372, duration: 5.043s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 244678/1000000: episode: 373, duration: 5.074s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 245311/1000000: episode: 374, duration: 5.088s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 245946/1000000: episode: 375, duration: 5.082s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 246569/1000000: episode: 376, duration: 5.018s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 247194/1000000: episode: 377, duration: 5.009s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 247806/1000000: episode: 378, duration: 5.016s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 248430/1000000: episode: 379, duration: 5.024s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 249069/1000000: episode: 380, duration: 5.133s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 249704/1000000: episode: 381, duration: 5.091s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 250352/1000000: episode: 382, duration: 5.635s, episode steps: 648, steps per second: 115, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.540699, mean_absolute_error: 0.013278, acc: 0.998454, mean_q: 1.000000 250984/1000000: episode: 383, duration: 5.125s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 251624/1000000: episode: 384, duration: 5.163s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 252255/1000000: episode: 385, duration: 5.113s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 252875/1000000: episode: 386, duration: 5.494s, episode steps: 620, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 253494/1000000: episode: 387, duration: 5.039s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 254124/1000000: episode: 388, duration: 5.073s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 254747/1000000: episode: 389, duration: 5.040s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 255365/1000000: episode: 390, duration: 4.992s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 256003/1000000: episode: 391, duration: 5.739s, episode steps: 638, steps per second: 111, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000 256626/1000000: episode: 392, duration: 5.036s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 257246/1000000: episode: 393, duration: 5.040s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 257875/1000000: episode: 394, duration: 5.057s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 258489/1000000: episode: 395, duration: 4.983s, episode steps: 614, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 259121/1000000: episode: 396, duration: 5.088s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 259750/1000000: episode: 397, duration: 5.084s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 260599/1000000: episode: 398, duration: 7.239s, episode steps: 849, steps per second: 117, episode reward: 440.000, mean reward: 0.518 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.798 [0.000, 228.000], loss: 26.175866, mean_absolute_error: 0.059227, acc: 0.997642, mean_q: 1.000000 261235/1000000: episode: 399, duration: 5.146s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 261864/1000000: episode: 400, duration: 5.100s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 262494/1000000: episode: 401, duration: 5.576s, episode steps: 630, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 263123/1000000: episode: 402, duration: 5.129s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 263747/1000000: episode: 403, duration: 5.065s, episode steps: 624, steps per second: 123, episode reward: 110.000, mean reward: 0.176 [0.000, 10.000], mean action: 7.968 [0.000, 8.000], mean observation: 72.902 [0.000, 228.000], loss: 0.886679, mean_absolute_error: 0.022456, acc: 0.993579, mean_q: 1.000000 264377/1000000: episode: 404, duration: 5.337s, episode steps: 630, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 265002/1000000: episode: 405, duration: 5.074s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 265628/1000000: episode: 406, duration: 5.087s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 266261/1000000: episode: 407, duration: 5.452s, episode steps: 633, steps per second: 116, episode reward: 160.000, mean reward: 0.253 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 1.267265, mean_absolute_error: 0.030403, acc: 0.995253, mean_q: 1.000000 266897/1000000: episode: 408, duration: 5.177s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 267525/1000000: episode: 409, duration: 5.062s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 268148/1000000: episode: 410, duration: 5.013s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 268777/1000000: episode: 411, duration: 5.082s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 269418/1000000: episode: 412, duration: 5.161s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 270044/1000000: episode: 413, duration: 5.400s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 270669/1000000: episode: 414, duration: 5.024s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 271293/1000000: episode: 415, duration: 5.059s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 271921/1000000: episode: 416, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 272549/1000000: episode: 417, duration: 5.034s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 273176/1000000: episode: 418, duration: 5.001s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 273810/1000000: episode: 419, duration: 5.068s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 274448/1000000: episode: 420, duration: 5.108s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000 275081/1000000: episode: 421, duration: 5.060s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 275713/1000000: episode: 422, duration: 5.039s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 276341/1000000: episode: 423, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 276962/1000000: episode: 424, duration: 4.966s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 277586/1000000: episode: 425, duration: 5.014s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 278206/1000000: episode: 426, duration: 5.003s, episode steps: 620, steps per second: 124, episode reward: 110.000, mean reward: 0.177 [0.000, 10.000], mean action: 7.977 [1.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.892408, mean_absolute_error: 0.022594, acc: 0.993538, mean_q: 1.000000 278825/1000000: episode: 427, duration: 4.996s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 279443/1000000: episode: 428, duration: 4.969s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 280072/1000000: episode: 429, duration: 5.550s, episode steps: 629, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.979 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000 280693/1000000: episode: 430, duration: 5.040s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 281315/1000000: episode: 431, duration: 5.010s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 281953/1000000: episode: 432, duration: 5.151s, episode steps: 638, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 282571/1000000: episode: 433, duration: 4.977s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 283187/1000000: episode: 434, duration: 5.054s, episode steps: 616, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 283806/1000000: episode: 435, duration: 5.227s, episode steps: 619, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 284440/1000000: episode: 436, duration: 5.344s, episode steps: 634, steps per second: 119, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 285066/1000000: episode: 437, duration: 5.237s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 285690/1000000: episode: 438, duration: 5.068s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 286314/1000000: episode: 439, duration: 5.131s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 286937/1000000: episode: 440, duration: 5.286s, episode steps: 623, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 287766/1000000: episode: 441, duration: 6.812s, episode steps: 829, steps per second: 122, episode reward: 440.000, mean reward: 0.531 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.811 [0.000, 228.000], loss: 26.809328, mean_absolute_error: 0.061029, acc: 0.996377, mean_q: 1.000000 288389/1000000: episode: 442, duration: 4.997s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 289021/1000000: episode: 443, duration: 5.151s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 289636/1000000: episode: 444, duration: 4.987s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 290263/1000000: episode: 445, duration: 5.076s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 290897/1000000: episode: 446, duration: 5.146s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 291571/1000000: episode: 447, duration: 5.509s, episode steps: 674, steps per second: 122, episode reward: 100.000, mean reward: 0.148 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.902 [0.000, 228.000], loss: 0.743720, mean_absolute_error: 0.018242, acc: 0.997028, mean_q: 1.000000 292204/1000000: episode: 448, duration: 5.101s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 292816/1000000: episode: 449, duration: 5.003s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 293444/1000000: episode: 450, duration: 5.056s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 294068/1000000: episode: 451, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 294690/1000000: episode: 452, duration: 5.034s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 295315/1000000: episode: 453, duration: 5.032s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 295938/1000000: episode: 454, duration: 5.026s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 296569/1000000: episode: 455, duration: 5.189s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 297201/1000000: episode: 456, duration: 5.109s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 297832/1000000: episode: 457, duration: 5.092s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 298459/1000000: episode: 458, duration: 5.030s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 299079/1000000: episode: 459, duration: 4.963s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 299704/1000000: episode: 460, duration: 5.127s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 300351/1000000: episode: 461, duration: 5.223s, episode steps: 647, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.541536, mean_absolute_error: 0.013297, acc: 0.998452, mean_q: 1.000000 300930/1000000: episode: 462, duration: 4.676s, episode steps: 579, steps per second: 124, episode reward: 160.000, mean reward: 0.276 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.815 [0.000, 228.000], loss: 1.383942, mean_absolute_error: 0.032568, acc: 0.996540, mean_q: 1.000000 301552/1000000: episode: 463, duration: 5.009s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 302175/1000000: episode: 464, duration: 5.008s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 302800/1000000: episode: 465, duration: 5.454s, episode steps: 625, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 303439/1000000: episode: 466, duration: 5.279s, episode steps: 639, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 304062/1000000: episode: 467, duration: 5.113s, episode steps: 623, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 304702/1000000: episode: 468, duration: 5.267s, episode steps: 640, steps per second: 122, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 305327/1000000: episode: 469, duration: 5.089s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 305956/1000000: episode: 470, duration: 5.124s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 306567/1000000: episode: 471, duration: 4.988s, episode steps: 611, steps per second: 123, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000 307195/1000000: episode: 472, duration: 5.196s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 307830/1000000: episode: 473, duration: 5.086s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 308460/1000000: episode: 474, duration: 5.046s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 309090/1000000: episode: 475, duration: 5.054s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 309723/1000000: episode: 476, duration: 5.068s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 310348/1000000: episode: 477, duration: 5.016s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 310972/1000000: episode: 478, duration: 5.014s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 311604/1000000: episode: 479, duration: 5.109s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 312225/1000000: episode: 480, duration: 5.045s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 312846/1000000: episode: 481, duration: 5.099s, episode steps: 621, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 313470/1000000: episode: 482, duration: 5.098s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 314097/1000000: episode: 483, duration: 5.119s, episode steps: 627, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 314727/1000000: episode: 484, duration: 5.103s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 315347/1000000: episode: 485, duration: 4.983s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 315980/1000000: episode: 486, duration: 5.123s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 316601/1000000: episode: 487, duration: 5.069s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 317238/1000000: episode: 488, duration: 5.141s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 317863/1000000: episode: 489, duration: 5.097s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 318490/1000000: episode: 490, duration: 5.051s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 319115/1000000: episode: 491, duration: 5.017s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 319743/1000000: episode: 492, duration: 5.080s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 320365/1000000: episode: 493, duration: 5.000s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 320994/1000000: episode: 494, duration: 5.039s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 321621/1000000: episode: 495, duration: 5.010s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 322249/1000000: episode: 496, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 322874/1000000: episode: 497, duration: 4.998s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 323505/1000000: episode: 498, duration: 5.044s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 324127/1000000: episode: 499, duration: 4.982s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 324761/1000000: episode: 500, duration: 5.074s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 325394/1000000: episode: 501, duration: 5.064s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 326025/1000000: episode: 502, duration: 5.075s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 326648/1000000: episode: 503, duration: 4.995s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 327269/1000000: episode: 504, duration: 4.964s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 327899/1000000: episode: 505, duration: 5.015s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 328540/1000000: episode: 506, duration: 5.141s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 329170/1000000: episode: 507, duration: 5.214s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 329802/1000000: episode: 508, duration: 5.098s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 330430/1000000: episode: 509, duration: 5.058s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 331055/1000000: episode: 510, duration: 5.052s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 331690/1000000: episode: 511, duration: 5.087s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 332327/1000000: episode: 512, duration: 5.087s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 332955/1000000: episode: 513, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 333574/1000000: episode: 514, duration: 4.980s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 334204/1000000: episode: 515, duration: 5.081s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 334836/1000000: episode: 516, duration: 5.088s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 335461/1000000: episode: 517, duration: 5.001s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 336082/1000000: episode: 518, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 336714/1000000: episode: 519, duration: 5.087s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 337342/1000000: episode: 520, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 337968/1000000: episode: 521, duration: 5.030s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 338598/1000000: episode: 522, duration: 5.135s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 339231/1000000: episode: 523, duration: 5.087s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 339855/1000000: episode: 524, duration: 4.992s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.580756, mean_absolute_error: 0.014810, acc: 0.995185, mean_q: 1.000000 340481/1000000: episode: 525, duration: 5.063s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 341112/1000000: episode: 526, duration: 5.080s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 341751/1000000: episode: 527, duration: 5.150s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 342382/1000000: episode: 528, duration: 5.081s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 343001/1000000: episode: 529, duration: 4.985s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 343632/1000000: episode: 530, duration: 5.079s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 344256/1000000: episode: 531, duration: 5.052s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 344887/1000000: episode: 532, duration: 5.119s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 345502/1000000: episode: 533, duration: 5.064s, episode steps: 615, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 346122/1000000: episode: 534, duration: 5.015s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 346977/1000000: episode: 535, duration: 6.916s, episode steps: 855, steps per second: 124, episode reward: 440.000, mean reward: 0.515 [0.000, 200.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.791 [0.000, 228.000], loss: 25.991961, mean_absolute_error: 0.058819, acc: 0.997658, mean_q: 1.000000 347614/1000000: episode: 536, duration: 5.115s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 348239/1000000: episode: 537, duration: 5.053s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 348865/1000000: episode: 538, duration: 5.234s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 349491/1000000: episode: 539, duration: 5.049s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 350119/1000000: episode: 540, duration: 5.128s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 350746/1000000: episode: 541, duration: 5.324s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 351379/1000000: episode: 542, duration: 5.089s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 352011/1000000: episode: 543, duration: 5.302s, episode steps: 632, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 352639/1000000: episode: 544, duration: 5.489s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 353273/1000000: episode: 545, duration: 5.370s, episode steps: 634, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 353905/1000000: episode: 546, duration: 5.715s, episode steps: 632, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 354533/1000000: episode: 547, duration: 5.850s, episode steps: 628, steps per second: 107, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 355158/1000000: episode: 548, duration: 5.628s, episode steps: 625, steps per second: 111, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 355784/1000000: episode: 549, duration: 5.390s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 356407/1000000: episode: 550, duration: 5.117s, episode steps: 623, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 357036/1000000: episode: 551, duration: 5.322s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 357670/1000000: episode: 552, duration: 5.303s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 358290/1000000: episode: 553, duration: 5.034s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 358917/1000000: episode: 554, duration: 5.155s, episode steps: 627, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 359554/1000000: episode: 555, duration: 5.177s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 360173/1000000: episode: 556, duration: 5.038s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 360794/1000000: episode: 557, duration: 5.209s, episode steps: 621, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 361414/1000000: episode: 558, duration: 5.088s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 362045/1000000: episode: 559, duration: 5.248s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 362667/1000000: episode: 560, duration: 5.265s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 363297/1000000: episode: 561, duration: 5.137s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 363927/1000000: episode: 562, duration: 5.174s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 364555/1000000: episode: 563, duration: 5.050s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 365195/1000000: episode: 564, duration: 5.211s, episode steps: 640, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 366341/1000000: episode: 565, duration: 9.548s, episode steps: 1146, steps per second: 120, episode reward: 1640.000, mean reward: 1.431 [0.000, 800.000], mean action: 7.994 [1.000, 8.000], mean observation: 72.846 [0.000, 228.000], loss: 368.720654, mean_absolute_error: 0.160597, acc: 0.998253, mean_q: 1.000000 366964/1000000: episode: 566, duration: 5.001s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 367594/1000000: episode: 567, duration: 5.436s, episode steps: 630, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 368225/1000000: episode: 568, duration: 5.448s, episode steps: 631, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 368850/1000000: episode: 569, duration: 5.271s, episode steps: 625, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 369472/1000000: episode: 570, duration: 5.233s, episode steps: 622, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 370308/1000000: episode: 571, duration: 6.877s, episode steps: 836, steps per second: 122, episode reward: 840.000, mean reward: 1.005 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.809 [0.000, 228.000], loss: 122.386986, mean_absolute_error: 0.113356, acc: 0.997605, mean_q: 1.000000 370933/1000000: episode: 572, duration: 5.348s, episode steps: 625, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 371553/1000000: episode: 573, duration: 4.992s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 372502/1000000: episode: 574, duration: 8.149s, episode steps: 949, steps per second: 116, episode reward: 840.000, mean reward: 0.885 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 107.798670, mean_absolute_error: 0.099977, acc: 0.997890, mean_q: 1.000000 373143/1000000: episode: 575, duration: 8.422s, episode steps: 641, steps per second: 76, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548159, mean_absolute_error: 0.013927, acc: 0.996875, mean_q: 1.000000 373778/1000000: episode: 576, duration: 5.268s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 374397/1000000: episode: 577, duration: 5.106s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 375033/1000000: episode: 578, duration: 5.828s, episode steps: 636, steps per second: 109, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 375668/1000000: episode: 579, duration: 5.309s, episode steps: 635, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 376286/1000000: episode: 580, duration: 5.011s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 376909/1000000: episode: 581, duration: 5.000s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 377534/1000000: episode: 582, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 378178/1000000: episode: 583, duration: 5.163s, episode steps: 644, steps per second: 125, episode reward: 100.000, mean reward: 0.155 [0.000, 10.000], mean action: 7.995 [6.000, 8.000], mean observation: 72.900 [0.000, 228.000], loss: 0.779956, mean_absolute_error: 0.019554, acc: 0.995334, mean_q: 1.000000 378800/1000000: episode: 584, duration: 5.003s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 379431/1000000: episode: 585, duration: 5.141s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 380060/1000000: episode: 586, duration: 5.310s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 380684/1000000: episode: 587, duration: 5.202s, episode steps: 624, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 381302/1000000: episode: 588, duration: 5.308s, episode steps: 618, steps per second: 116, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000 381919/1000000: episode: 589, duration: 4.981s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 382535/1000000: episode: 590, duration: 5.109s, episode steps: 616, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 383161/1000000: episode: 591, duration: 5.124s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 383800/1000000: episode: 592, duration: 5.140s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 384426/1000000: episode: 593, duration: 5.059s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 385050/1000000: episode: 594, duration: 5.107s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 385676/1000000: episode: 595, duration: 5.246s, episode steps: 626, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000 386292/1000000: episode: 596, duration: 5.130s, episode steps: 616, steps per second: 120, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 386916/1000000: episode: 597, duration: 5.240s, episode steps: 624, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 387536/1000000: episode: 598, duration: 5.261s, episode steps: 620, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 388164/1000000: episode: 599, duration: 5.190s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 388786/1000000: episode: 600, duration: 5.254s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 389406/1000000: episode: 601, duration: 5.019s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 390033/1000000: episode: 602, duration: 5.024s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 390660/1000000: episode: 603, duration: 5.017s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 391278/1000000: episode: 604, duration: 4.958s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 391910/1000000: episode: 605, duration: 5.051s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 392522/1000000: episode: 606, duration: 4.908s, episode steps: 612, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.993 [4.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.574174, mean_absolute_error: 0.014535, acc: 0.996727, mean_q: 1.000000 393146/1000000: episode: 607, duration: 5.037s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 393764/1000000: episode: 608, duration: 5.102s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 394394/1000000: episode: 609, duration: 5.254s, episode steps: 630, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 395020/1000000: episode: 610, duration: 5.135s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 395643/1000000: episode: 611, duration: 5.160s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 396276/1000000: episode: 612, duration: 5.272s, episode steps: 633, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 397118/1000000: episode: 613, duration: 6.843s, episode steps: 842, steps per second: 123, episode reward: 840.000, mean reward: 0.998 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 121.513833, mean_absolute_error: 0.112555, acc: 0.997622, mean_q: 1.000000 397747/1000000: episode: 614, duration: 5.107s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 398390/1000000: episode: 615, duration: 5.284s, episode steps: 643, steps per second: 122, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 399033/1000000: episode: 616, duration: 5.232s, episode steps: 643, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 399655/1000000: episode: 617, duration: 5.092s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 400282/1000000: episode: 618, duration: 5.108s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 400920/1000000: episode: 619, duration: 5.172s, episode steps: 638, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000 401544/1000000: episode: 620, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 402179/1000000: episode: 621, duration: 5.081s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 402811/1000000: episode: 622, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 403433/1000000: episode: 623, duration: 5.001s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 404534/1000000: episode: 624, duration: 8.875s, episode steps: 1101, steps per second: 124, episode reward: 440.000, mean reward: 0.400 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 20.179224, mean_absolute_error: 0.045913, acc: 0.998182, mean_q: 1.000000 405166/1000000: episode: 625, duration: 5.131s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 405791/1000000: episode: 626, duration: 5.127s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 406410/1000000: episode: 627, duration: 5.040s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 407034/1000000: episode: 628, duration: 5.112s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 407651/1000000: episode: 629, duration: 5.243s, episode steps: 617, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 408282/1000000: episode: 630, duration: 5.259s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 408898/1000000: episode: 631, duration: 4.956s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 409523/1000000: episode: 632, duration: 5.044s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 410149/1000000: episode: 633, duration: 5.032s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 410775/1000000: episode: 634, duration: 5.006s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 411375/1000000: episode: 635, duration: 4.815s, episode steps: 600, steps per second: 125, episode reward: 130.000, mean reward: 0.217 [0.000, 10.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.085509, mean_absolute_error: 0.025912, acc: 0.996661, mean_q: 1.000000 412015/1000000: episode: 636, duration: 5.143s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 412646/1000000: episode: 637, duration: 5.128s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 413280/1000000: episode: 638, duration: 5.086s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 413897/1000000: episode: 639, duration: 4.975s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 414530/1000000: episode: 640, duration: 5.086s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 415149/1000000: episode: 641, duration: 4.972s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 415783/1000000: episode: 642, duration: 5.092s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 416408/1000000: episode: 643, duration: 5.025s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 417028/1000000: episode: 644, duration: 5.065s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 417655/1000000: episode: 645, duration: 5.027s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 418296/1000000: episode: 646, duration: 5.158s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 418925/1000000: episode: 647, duration: 5.097s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 419559/1000000: episode: 648, duration: 5.115s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 420187/1000000: episode: 649, duration: 5.061s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 420809/1000000: episode: 650, duration: 5.039s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 421431/1000000: episode: 651, duration: 5.049s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 422059/1000000: episode: 652, duration: 5.067s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 422690/1000000: episode: 653, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 423317/1000000: episode: 654, duration: 5.038s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 423949/1000000: episode: 655, duration: 5.085s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 424570/1000000: episode: 656, duration: 5.004s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 425197/1000000: episode: 657, duration: 5.051s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 425829/1000000: episode: 658, duration: 5.077s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 426461/1000000: episode: 659, duration: 5.232s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 427080/1000000: episode: 660, duration: 5.114s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 427699/1000000: episode: 661, duration: 5.099s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 428313/1000000: episode: 662, duration: 4.936s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 428951/1000000: episode: 663, duration: 5.102s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000 429571/1000000: episode: 664, duration: 4.978s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000 430187/1000000: episode: 665, duration: 4.941s, episode steps: 616, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 430823/1000000: episode: 666, duration: 5.158s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 431454/1000000: episode: 667, duration: 5.098s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 432092/1000000: episode: 668, duration: 5.113s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000 433139/1000000: episode: 669, duration: 8.543s, episode steps: 1047, steps per second: 123, episode reward: 840.000, mean reward: 0.802 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 97.698991, mean_absolute_error: 0.090714, acc: 0.998088, mean_q: 1.000000 433762/1000000: episode: 670, duration: 5.247s, episode steps: 623, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 434388/1000000: episode: 671, duration: 5.190s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 435017/1000000: episode: 672, duration: 5.468s, episode steps: 629, steps per second: 115, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 435642/1000000: episode: 673, duration: 5.275s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 436268/1000000: episode: 674, duration: 5.346s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 436903/1000000: episode: 675, duration: 5.504s, episode steps: 635, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 437526/1000000: episode: 676, duration: 5.401s, episode steps: 623, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 438154/1000000: episode: 677, duration: 5.075s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 438766/1000000: episode: 678, duration: 5.008s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 439403/1000000: episode: 679, duration: 5.423s, episode steps: 637, steps per second: 117, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 440026/1000000: episode: 680, duration: 5.211s, episode steps: 623, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 440659/1000000: episode: 681, duration: 5.212s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 441296/1000000: episode: 682, duration: 5.468s, episode steps: 637, steps per second: 116, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 441922/1000000: episode: 683, duration: 5.519s, episode steps: 626, steps per second: 113, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 442555/1000000: episode: 684, duration: 5.308s, episode steps: 633, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000 443189/1000000: episode: 685, duration: 5.236s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 443804/1000000: episode: 686, duration: 5.048s, episode steps: 615, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 444434/1000000: episode: 687, duration: 5.175s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 445056/1000000: episode: 688, duration: 5.214s, episode steps: 622, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 445690/1000000: episode: 689, duration: 5.300s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 446309/1000000: episode: 690, duration: 5.086s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 446944/1000000: episode: 691, duration: 5.248s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 447574/1000000: episode: 692, duration: 5.196s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 448196/1000000: episode: 693, duration: 5.435s, episode steps: 622, steps per second: 114, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 448835/1000000: episode: 694, duration: 5.224s, episode steps: 639, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 449466/1000000: episode: 695, duration: 7.247s, episode steps: 631, steps per second: 87, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 450088/1000000: episode: 696, duration: 5.812s, episode steps: 622, steps per second: 107, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 450721/1000000: episode: 697, duration: 5.389s, episode steps: 633, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 451341/1000000: episode: 698, duration: 5.218s, episode steps: 620, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 451967/1000000: episode: 699, duration: 5.626s, episode steps: 626, steps per second: 111, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 452592/1000000: episode: 700, duration: 5.315s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 453226/1000000: episode: 701, duration: 5.224s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 453855/1000000: episode: 702, duration: 5.384s, episode steps: 629, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 454483/1000000: episode: 703, duration: 5.517s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 455112/1000000: episode: 704, duration: 5.297s, episode steps: 629, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 455737/1000000: episode: 705, duration: 5.286s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 456368/1000000: episode: 706, duration: 5.304s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 456999/1000000: episode: 707, duration: 5.299s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 457628/1000000: episode: 708, duration: 5.653s, episode steps: 629, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 458248/1000000: episode: 709, duration: 5.004s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 458874/1000000: episode: 710, duration: 5.035s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 459509/1000000: episode: 711, duration: 5.153s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 460141/1000000: episode: 712, duration: 5.123s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 460765/1000000: episode: 713, duration: 5.023s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 461383/1000000: episode: 714, duration: 4.992s, episode steps: 618, steps per second: 124, episode reward: 160.000, mean reward: 0.259 [0.000, 10.000], mean action: 7.974 [0.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 1.298072, mean_absolute_error: 0.031115, acc: 0.995138, mean_q: 1.000000 462007/1000000: episode: 715, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000 462639/1000000: episode: 716, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 463271/1000000: episode: 717, duration: 5.090s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 463901/1000000: episode: 718, duration: 5.140s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 464525/1000000: episode: 719, duration: 5.203s, episode steps: 624, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 465147/1000000: episode: 720, duration: 5.054s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 465763/1000000: episode: 721, duration: 4.972s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 466392/1000000: episode: 722, duration: 5.073s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 467022/1000000: episode: 723, duration: 5.072s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 467657/1000000: episode: 724, duration: 5.124s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 468275/1000000: episode: 725, duration: 4.987s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 468912/1000000: episode: 726, duration: 5.153s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 469547/1000000: episode: 727, duration: 5.132s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 470175/1000000: episode: 728, duration: 5.072s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 470800/1000000: episode: 729, duration: 5.283s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 471426/1000000: episode: 730, duration: 5.156s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 472038/1000000: episode: 731, duration: 5.015s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 472668/1000000: episode: 732, duration: 5.274s, episode steps: 630, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 473295/1000000: episode: 733, duration: 5.182s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 473928/1000000: episode: 734, duration: 5.253s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 474554/1000000: episode: 735, duration: 5.069s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 475179/1000000: episode: 736, duration: 5.036s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000 475814/1000000: episode: 737, duration: 5.155s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 476440/1000000: episode: 738, duration: 5.554s, episode steps: 626, steps per second: 113, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 477069/1000000: episode: 739, duration: 5.082s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 477705/1000000: episode: 740, duration: 5.114s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 478330/1000000: episode: 741, duration: 5.024s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 478961/1000000: episode: 742, duration: 5.128s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 479583/1000000: episode: 743, duration: 4.995s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 480210/1000000: episode: 744, duration: 5.103s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 480832/1000000: episode: 745, duration: 5.027s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 481451/1000000: episode: 746, duration: 5.008s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 482076/1000000: episode: 747, duration: 5.070s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 482914/1000000: episode: 748, duration: 6.783s, episode steps: 838, steps per second: 124, episode reward: 440.000, mean reward: 0.525 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.807 [0.000, 228.000], loss: 26.519873, mean_absolute_error: 0.059991, acc: 0.997611, mean_q: 1.000000 483546/1000000: episode: 749, duration: 5.091s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 484170/1000000: episode: 750, duration: 5.080s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 484804/1000000: episode: 751, duration: 5.445s, episode steps: 634, steps per second: 116, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 485603/1000000: episode: 752, duration: 6.580s, episode steps: 799, steps per second: 121, episode reward: 240.000, mean reward: 0.300 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 2.755802, mean_absolute_error: 0.035024, acc: 0.997494, mean_q: 1.000000 486227/1000000: episode: 753, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 486854/1000000: episode: 754, duration: 5.038s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 487472/1000000: episode: 755, duration: 4.955s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 488095/1000000: episode: 756, duration: 4.988s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 488716/1000000: episode: 757, duration: 4.971s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 489356/1000000: episode: 758, duration: 5.123s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 489983/1000000: episode: 759, duration: 4.979s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 490613/1000000: episode: 760, duration: 5.029s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 491247/1000000: episode: 761, duration: 5.087s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 491865/1000000: episode: 762, duration: 4.961s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 492496/1000000: episode: 763, duration: 5.053s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 493135/1000000: episode: 764, duration: 5.134s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 493763/1000000: episode: 765, duration: 5.051s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 494388/1000000: episode: 766, duration: 4.985s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 495021/1000000: episode: 767, duration: 5.103s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 495635/1000000: episode: 768, duration: 4.944s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.572301, mean_absolute_error: 0.014492, acc: 0.996737, mean_q: 1.000000 496280/1000000: episode: 769, duration: 5.174s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000 496903/1000000: episode: 770, duration: 4.975s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 497532/1000000: episode: 771, duration: 5.051s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 498157/1000000: episode: 772, duration: 5.030s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 498792/1000000: episode: 773, duration: 5.071s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 499418/1000000: episode: 774, duration: 5.006s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 500044/1000000: episode: 775, duration: 5.000s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 500676/1000000: episode: 776, duration: 5.045s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 501308/1000000: episode: 777, duration: 5.052s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 501941/1000000: episode: 778, duration: 5.062s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 502562/1000000: episode: 779, duration: 5.006s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 503187/1000000: episode: 780, duration: 4.989s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 503810/1000000: episode: 781, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 504454/1000000: episode: 782, duration: 5.189s, episode steps: 644, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000 505080/1000000: episode: 783, duration: 5.021s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 505700/1000000: episode: 784, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 506334/1000000: episode: 785, duration: 5.073s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 506966/1000000: episode: 786, duration: 5.063s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000 507592/1000000: episode: 787, duration: 5.004s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 508215/1000000: episode: 788, duration: 4.968s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 508838/1000000: episode: 789, duration: 4.970s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 509472/1000000: episode: 790, duration: 5.045s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 510094/1000000: episode: 791, duration: 4.990s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 510725/1000000: episode: 792, duration: 5.023s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 511347/1000000: episode: 793, duration: 4.969s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 511968/1000000: episode: 794, duration: 4.963s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 512589/1000000: episode: 795, duration: 4.979s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 513225/1000000: episode: 796, duration: 5.118s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 513848/1000000: episode: 797, duration: 4.989s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 514480/1000000: episode: 798, duration: 5.061s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 515115/1000000: episode: 799, duration: 5.074s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 515757/1000000: episode: 800, duration: 5.140s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 516385/1000000: episode: 801, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 517014/1000000: episode: 802, duration: 5.030s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 517653/1000000: episode: 803, duration: 5.102s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 518293/1000000: episode: 804, duration: 5.100s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 518916/1000000: episode: 805, duration: 4.983s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 519544/1000000: episode: 806, duration: 5.091s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 520170/1000000: episode: 807, duration: 5.221s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 520792/1000000: episode: 808, duration: 5.321s, episode steps: 622, steps per second: 117, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 521414/1000000: episode: 809, duration: 5.384s, episode steps: 622, steps per second: 116, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 522011/1000000: episode: 810, duration: 4.997s, episode steps: 597, steps per second: 119, episode reward: 160.000, mean reward: 0.268 [0.000, 10.000], mean action: 7.977 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 1.327197, mean_absolute_error: 0.031803, acc: 0.994966, mean_q: 1.000000 522631/1000000: episode: 811, duration: 5.024s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 523260/1000000: episode: 812, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.542868, mean_absolute_error: 0.013822, acc: 0.996815, mean_q: 1.000000 523888/1000000: episode: 813, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 524526/1000000: episode: 814, duration: 5.091s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 525149/1000000: episode: 815, duration: 4.963s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 525784/1000000: episode: 816, duration: 5.072s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 526406/1000000: episode: 817, duration: 4.993s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 527037/1000000: episode: 818, duration: 5.047s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 527666/1000000: episode: 819, duration: 5.011s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 528292/1000000: episode: 820, duration: 4.981s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 528921/1000000: episode: 821, duration: 5.023s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 529554/1000000: episode: 822, duration: 5.044s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 530177/1000000: episode: 823, duration: 4.966s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 530808/1000000: episode: 824, duration: 5.063s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 531439/1000000: episode: 825, duration: 5.051s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 532066/1000000: episode: 826, duration: 5.007s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 532700/1000000: episode: 827, duration: 5.126s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 533326/1000000: episode: 828, duration: 5.029s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 533961/1000000: episode: 829, duration: 5.088s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 534594/1000000: episode: 830, duration: 5.141s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 535225/1000000: episode: 831, duration: 5.081s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 535857/1000000: episode: 832, duration: 5.200s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 536487/1000000: episode: 833, duration: 5.559s, episode steps: 630, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 537103/1000000: episode: 834, duration: 5.477s, episode steps: 616, steps per second: 112, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 537732/1000000: episode: 835, duration: 5.224s, episode steps: 629, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 538355/1000000: episode: 836, duration: 4.979s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 538986/1000000: episode: 837, duration: 5.085s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 539626/1000000: episode: 838, duration: 5.121s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 540259/1000000: episode: 839, duration: 5.072s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 540902/1000000: episode: 840, duration: 5.179s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 541522/1000000: episode: 841, duration: 4.998s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 542162/1000000: episode: 842, duration: 5.154s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 542788/1000000: episode: 843, duration: 5.039s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 543409/1000000: episode: 844, duration: 4.980s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 544044/1000000: episode: 845, duration: 5.097s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 544673/1000000: episode: 846, duration: 5.049s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 545305/1000000: episode: 847, duration: 5.080s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 545935/1000000: episode: 848, duration: 5.071s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 546560/1000000: episode: 849, duration: 5.017s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 547185/1000000: episode: 850, duration: 5.012s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 547813/1000000: episode: 851, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 548440/1000000: episode: 852, duration: 5.040s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 549075/1000000: episode: 853, duration: 5.115s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 549696/1000000: episode: 854, duration: 4.995s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 550323/1000000: episode: 855, duration: 5.043s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 550957/1000000: episode: 856, duration: 5.073s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 551587/1000000: episode: 857, duration: 5.051s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 552216/1000000: episode: 858, duration: 5.054s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 552853/1000000: episode: 859, duration: 5.126s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 553479/1000000: episode: 860, duration: 5.045s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 554111/1000000: episode: 861, duration: 5.098s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 554734/1000000: episode: 862, duration: 5.015s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 555357/1000000: episode: 863, duration: 5.028s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 555995/1000000: episode: 864, duration: 5.119s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 556622/1000000: episode: 865, duration: 5.017s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 557244/1000000: episode: 866, duration: 4.961s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 557864/1000000: episode: 867, duration: 4.961s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 558497/1000000: episode: 868, duration: 5.068s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 559117/1000000: episode: 869, duration: 4.980s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 559739/1000000: episode: 870, duration: 5.003s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 560372/1000000: episode: 871, duration: 5.070s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 561003/1000000: episode: 872, duration: 5.055s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 561620/1000000: episode: 873, duration: 4.927s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 562239/1000000: episode: 874, duration: 4.966s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 562921/1000000: episode: 875, duration: 5.471s, episode steps: 682, steps per second: 125, episode reward: 90.000, mean reward: 0.132 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 0.661709, mean_absolute_error: 0.016412, acc: 0.997063, mean_q: 1.000000 563547/1000000: episode: 876, duration: 5.036s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 564177/1000000: episode: 877, duration: 5.057s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 564800/1000000: episode: 878, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 565428/1000000: episode: 879, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 566061/1000000: episode: 880, duration: 5.101s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 566683/1000000: episode: 881, duration: 5.012s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 567314/1000000: episode: 882, duration: 5.052s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 567929/1000000: episode: 883, duration: 4.944s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 568550/1000000: episode: 884, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 569173/1000000: episode: 885, duration: 4.975s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 569801/1000000: episode: 886, duration: 5.064s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 570441/1000000: episode: 887, duration: 5.147s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 571069/1000000: episode: 888, duration: 5.033s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 571694/1000000: episode: 889, duration: 5.027s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 572339/1000000: episode: 890, duration: 5.180s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000 572964/1000000: episode: 891, duration: 5.014s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 573589/1000000: episode: 892, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 574216/1000000: episode: 893, duration: 5.039s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 574845/1000000: episode: 894, duration: 5.036s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 575476/1000000: episode: 895, duration: 5.058s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 576104/1000000: episode: 896, duration: 5.077s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 576729/1000000: episode: 897, duration: 5.014s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 577353/1000000: episode: 898, duration: 5.034s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000 577989/1000000: episode: 899, duration: 5.087s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 578783/1000000: episode: 900, duration: 6.370s, episode steps: 794, steps per second: 125, episode reward: 240.000, mean reward: 0.302 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 2.773177, mean_absolute_error: 0.035237, acc: 0.997478, mean_q: 1.000000 579413/1000000: episode: 901, duration: 5.075s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 580041/1000000: episode: 902, duration: 5.043s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 580668/1000000: episode: 903, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 581293/1000000: episode: 904, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 581924/1000000: episode: 905, duration: 5.066s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 582557/1000000: episode: 906, duration: 5.069s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 583187/1000000: episode: 907, duration: 5.055s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 583816/1000000: episode: 908, duration: 5.055s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 584455/1000000: episode: 909, duration: 5.131s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 585088/1000000: episode: 910, duration: 5.063s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 585717/1000000: episode: 911, duration: 5.018s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 586353/1000000: episode: 912, duration: 5.094s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 586978/1000000: episode: 913, duration: 5.006s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 587599/1000000: episode: 914, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 588221/1000000: episode: 915, duration: 4.960s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 588845/1000000: episode: 916, duration: 4.983s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 589476/1000000: episode: 917, duration: 5.070s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 590106/1000000: episode: 918, duration: 5.039s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 590730/1000000: episode: 919, duration: 5.006s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 591352/1000000: episode: 920, duration: 5.023s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 591988/1000000: episode: 921, duration: 5.131s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 592623/1000000: episode: 922, duration: 5.090s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 593248/1000000: episode: 923, duration: 5.038s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 593884/1000000: episode: 924, duration: 5.114s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 594510/1000000: episode: 925, duration: 5.040s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 595126/1000000: episode: 926, duration: 4.963s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 595744/1000000: episode: 927, duration: 4.965s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 596380/1000000: episode: 928, duration: 5.090s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 596992/1000000: episode: 929, duration: 4.934s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 597614/1000000: episode: 930, duration: 4.983s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 598238/1000000: episode: 931, duration: 5.041s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 598860/1000000: episode: 932, duration: 5.011s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 599495/1000000: episode: 933, duration: 5.073s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 600115/1000000: episode: 934, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 600736/1000000: episode: 935, duration: 4.978s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 601366/1000000: episode: 936, duration: 5.066s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 601996/1000000: episode: 937, duration: 5.046s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 602628/1000000: episode: 938, duration: 5.067s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 603242/1000000: episode: 939, duration: 4.923s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 603853/1000000: episode: 940, duration: 4.923s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000 604474/1000000: episode: 941, duration: 5.024s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 605108/1000000: episode: 942, duration: 5.055s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 605732/1000000: episode: 943, duration: 5.019s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 606360/1000000: episode: 944, duration: 5.042s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 606989/1000000: episode: 945, duration: 5.036s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 607612/1000000: episode: 946, duration: 5.035s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 608232/1000000: episode: 947, duration: 4.964s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 608860/1000000: episode: 948, duration: 5.058s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 609503/1000000: episode: 949, duration: 5.173s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000 610128/1000000: episode: 950, duration: 4.999s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 610751/1000000: episode: 951, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 611381/1000000: episode: 952, duration: 5.620s, episode steps: 630, steps per second: 112, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 612012/1000000: episode: 953, duration: 5.025s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 612648/1000000: episode: 954, duration: 5.091s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 613279/1000000: episode: 955, duration: 5.077s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 613910/1000000: episode: 956, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 614534/1000000: episode: 957, duration: 5.026s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 615171/1000000: episode: 958, duration: 5.092s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 615803/1000000: episode: 959, duration: 5.046s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 616422/1000000: episode: 960, duration: 4.958s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 617050/1000000: episode: 961, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 617674/1000000: episode: 962, duration: 4.975s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 618295/1000000: episode: 963, duration: 4.955s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 618917/1000000: episode: 964, duration: 4.962s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 619550/1000000: episode: 965, duration: 5.047s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 620184/1000000: episode: 966, duration: 5.058s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 620805/1000000: episode: 967, duration: 4.983s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 621437/1000000: episode: 968, duration: 5.095s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 622068/1000000: episode: 969, duration: 5.060s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 622688/1000000: episode: 970, duration: 4.977s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 623296/1000000: episode: 971, duration: 4.917s, episode steps: 608, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.576327, mean_absolute_error: 0.014080, acc: 0.998353, mean_q: 1.000000 623911/1000000: episode: 972, duration: 4.918s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 624535/1000000: episode: 973, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 625159/1000000: episode: 974, duration: 4.969s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 625788/1000000: episode: 975, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 626409/1000000: episode: 976, duration: 4.973s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 627033/1000000: episode: 977, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 627659/1000000: episode: 978, duration: 5.027s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 628286/1000000: episode: 979, duration: 5.036s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 628901/1000000: episode: 980, duration: 4.938s, episode steps: 615, steps per second: 125, episode reward: 80.000, mean reward: 0.130 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.898 [0.000, 228.000], loss: 0.652639, mean_absolute_error: 0.016276, acc: 0.996743, mean_q: 1.000000 629535/1000000: episode: 981, duration: 5.091s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 630171/1000000: episode: 982, duration: 5.070s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 630799/1000000: episode: 983, duration: 5.006s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 631423/1000000: episode: 984, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 632059/1000000: episode: 985, duration: 5.085s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.536884, mean_absolute_error: 0.013682, acc: 0.996850, mean_q: 1.000000 632687/1000000: episode: 986, duration: 5.015s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 633311/1000000: episode: 987, duration: 4.984s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 633941/1000000: episode: 988, duration: 5.064s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 634565/1000000: episode: 989, duration: 4.992s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 635193/1000000: episode: 990, duration: 5.033s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 635824/1000000: episode: 991, duration: 5.062s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000 636452/1000000: episode: 992, duration: 5.045s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 637076/1000000: episode: 993, duration: 4.986s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 637710/1000000: episode: 994, duration: 5.117s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 638335/1000000: episode: 995, duration: 4.996s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 638950/1000000: episode: 996, duration: 4.954s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 639594/1000000: episode: 997, duration: 5.165s, episode steps: 644, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.544062, mean_absolute_error: 0.013354, acc: 0.998445, mean_q: 1.000000 640222/1000000: episode: 998, duration: 5.001s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 640857/1000000: episode: 999, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 641490/1000000: episode: 1000, duration: 5.081s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 642109/1000000: episode: 1001, duration: 4.969s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 642732/1000000: episode: 1002, duration: 4.976s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 643368/1000000: episode: 1003, duration: 5.065s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 643995/1000000: episode: 1004, duration: 5.012s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 644639/1000000: episode: 1005, duration: 5.131s, episode steps: 644, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000 645276/1000000: episode: 1006, duration: 5.072s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 645912/1000000: episode: 1007, duration: 5.078s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 646550/1000000: episode: 1008, duration: 5.104s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 647171/1000000: episode: 1009, duration: 4.977s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 647809/1000000: episode: 1010, duration: 5.121s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 648447/1000000: episode: 1011, duration: 5.089s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 649077/1000000: episode: 1012, duration: 5.021s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 649716/1000000: episode: 1013, duration: 5.137s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000 650342/1000000: episode: 1014, duration: 5.002s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 650967/1000000: episode: 1015, duration: 4.995s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 651588/1000000: episode: 1016, duration: 4.967s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 652214/1000000: episode: 1017, duration: 4.989s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 652849/1000000: episode: 1018, duration: 5.063s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 653469/1000000: episode: 1019, duration: 4.966s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 654102/1000000: episode: 1020, duration: 5.043s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 654728/1000000: episode: 1021, duration: 5.014s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000 655349/1000000: episode: 1022, duration: 4.973s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 655978/1000000: episode: 1023, duration: 5.025s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 656594/1000000: episode: 1024, duration: 4.957s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 657220/1000000: episode: 1025, duration: 4.992s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 657844/1000000: episode: 1026, duration: 4.997s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 658479/1000000: episode: 1027, duration: 5.116s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 659111/1000000: episode: 1028, duration: 5.056s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 659740/1000000: episode: 1029, duration: 5.034s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 660359/1000000: episode: 1030, duration: 4.975s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 660986/1000000: episode: 1031, duration: 5.027s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 661620/1000000: episode: 1032, duration: 5.053s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 662250/1000000: episode: 1033, duration: 5.028s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 662869/1000000: episode: 1034, duration: 4.966s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 663488/1000000: episode: 1035, duration: 4.949s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 664113/1000000: episode: 1036, duration: 5.011s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 664734/1000000: episode: 1037, duration: 4.999s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 665358/1000000: episode: 1038, duration: 5.013s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 665982/1000000: episode: 1039, duration: 5.004s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 666622/1000000: episode: 1040, duration: 5.179s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 667247/1000000: episode: 1041, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 667874/1000000: episode: 1042, duration: 5.063s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 668495/1000000: episode: 1043, duration: 5.010s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 669120/1000000: episode: 1044, duration: 5.028s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 669741/1000000: episode: 1045, duration: 4.991s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 670371/1000000: episode: 1046, duration: 5.040s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 670988/1000000: episode: 1047, duration: 4.963s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 671609/1000000: episode: 1048, duration: 4.985s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 672232/1000000: episode: 1049, duration: 4.990s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 672858/1000000: episode: 1050, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 673492/1000000: episode: 1051, duration: 5.092s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 674137/1000000: episode: 1052, duration: 5.148s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000 674949/1000000: episode: 1053, duration: 6.523s, episode steps: 812, steps per second: 124, episode reward: 440.000, mean reward: 0.542 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 27.370077, mean_absolute_error: 0.061878, acc: 0.997534, mean_q: 1.000000 675569/1000000: episode: 1054, duration: 4.960s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 676194/1000000: episode: 1055, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 676818/1000000: episode: 1056, duration: 5.039s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 677443/1000000: episode: 1057, duration: 4.990s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 678069/1000000: episode: 1058, duration: 5.033s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 678699/1000000: episode: 1059, duration: 5.059s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 679331/1000000: episode: 1060, duration: 5.027s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 679954/1000000: episode: 1061, duration: 4.979s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 680587/1000000: episode: 1062, duration: 5.067s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 681219/1000000: episode: 1063, duration: 5.054s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 681836/1000000: episode: 1064, duration: 4.948s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 682465/1000000: episode: 1065, duration: 5.052s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 683091/1000000: episode: 1066, duration: 5.011s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 683713/1000000: episode: 1067, duration: 4.989s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 684338/1000000: episode: 1068, duration: 4.988s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 684962/1000000: episode: 1069, duration: 5.025s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 685598/1000000: episode: 1070, duration: 5.106s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 686223/1000000: episode: 1071, duration: 5.054s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 686860/1000000: episode: 1072, duration: 5.114s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 687479/1000000: episode: 1073, duration: 4.976s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 688101/1000000: episode: 1074, duration: 5.006s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 688740/1000000: episode: 1075, duration: 5.107s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 689373/1000000: episode: 1076, duration: 5.084s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 690000/1000000: episode: 1077, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 690628/1000000: episode: 1078, duration: 5.013s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 691250/1000000: episode: 1079, duration: 4.983s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 691875/1000000: episode: 1080, duration: 5.027s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 692507/1000000: episode: 1081, duration: 5.074s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 693135/1000000: episode: 1082, duration: 5.048s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 693763/1000000: episode: 1083, duration: 5.067s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 694378/1000000: episode: 1084, duration: 4.939s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 695018/1000000: episode: 1085, duration: 5.128s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 695646/1000000: episode: 1086, duration: 5.019s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 696281/1000000: episode: 1087, duration: 5.113s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 696915/1000000: episode: 1088, duration: 5.114s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 697532/1000000: episode: 1089, duration: 4.957s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 698159/1000000: episode: 1090, duration: 5.036s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 698784/1000000: episode: 1091, duration: 5.029s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 699406/1000000: episode: 1092, duration: 5.009s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 700033/1000000: episode: 1093, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 700655/1000000: episode: 1094, duration: 5.002s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 701289/1000000: episode: 1095, duration: 5.078s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 701918/1000000: episode: 1096, duration: 5.048s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 702559/1000000: episode: 1097, duration: 5.135s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 703189/1000000: episode: 1098, duration: 5.025s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 703817/1000000: episode: 1099, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 704457/1000000: episode: 1100, duration: 5.113s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 705078/1000000: episode: 1101, duration: 4.957s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 705703/1000000: episode: 1102, duration: 5.016s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 706342/1000000: episode: 1103, duration: 5.127s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 706957/1000000: episode: 1104, duration: 4.908s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 707573/1000000: episode: 1105, duration: 4.949s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 708187/1000000: episode: 1106, duration: 4.925s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 708810/1000000: episode: 1107, duration: 4.985s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 709440/1000000: episode: 1108, duration: 5.094s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 710051/1000000: episode: 1109, duration: 4.879s, episode steps: 611, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000 710684/1000000: episode: 1110, duration: 5.085s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 711315/1000000: episode: 1111, duration: 5.095s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 711937/1000000: episode: 1112, duration: 4.971s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 712555/1000000: episode: 1113, duration: 4.966s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 713194/1000000: episode: 1114, duration: 5.107s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 713820/1000000: episode: 1115, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 714455/1000000: episode: 1116, duration: 5.123s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 715085/1000000: episode: 1117, duration: 5.022s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 715714/1000000: episode: 1118, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 716355/1000000: episode: 1119, duration: 5.134s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 716978/1000000: episode: 1120, duration: 5.009s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 717613/1000000: episode: 1121, duration: 5.122s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 718248/1000000: episode: 1122, duration: 5.107s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 718883/1000000: episode: 1123, duration: 5.130s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 719503/1000000: episode: 1124, duration: 4.982s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 720131/1000000: episode: 1125, duration: 5.044s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 720755/1000000: episode: 1126, duration: 5.006s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 721379/1000000: episode: 1127, duration: 4.996s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 722005/1000000: episode: 1128, duration: 5.011s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 722630/1000000: episode: 1129, duration: 5.001s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 723239/1000000: episode: 1130, duration: 4.874s, episode steps: 609, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.575379, mean_absolute_error: 0.014059, acc: 0.998355, mean_q: 1.000000 723879/1000000: episode: 1131, duration: 5.103s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 724504/1000000: episode: 1132, duration: 5.008s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 725126/1000000: episode: 1133, duration: 4.992s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 725755/1000000: episode: 1134, duration: 5.058s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 726376/1000000: episode: 1135, duration: 4.989s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 727005/1000000: episode: 1136, duration: 5.060s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 727624/1000000: episode: 1137, duration: 4.941s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 728251/1000000: episode: 1138, duration: 5.058s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 728884/1000000: episode: 1139, duration: 5.041s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 729507/1000000: episode: 1140, duration: 5.015s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 730132/1000000: episode: 1141, duration: 4.998s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 730763/1000000: episode: 1142, duration: 5.061s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 731395/1000000: episode: 1143, duration: 5.076s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 732021/1000000: episode: 1144, duration: 4.994s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 732644/1000000: episode: 1145, duration: 4.988s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 733265/1000000: episode: 1146, duration: 4.955s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 733887/1000000: episode: 1147, duration: 4.952s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 734493/1000000: episode: 1148, duration: 4.828s, episode steps: 606, steps per second: 126, episode reward: 70.000, mean reward: 0.116 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.579868, mean_absolute_error: 0.014669, acc: 0.996694, mean_q: 1.000000 735116/1000000: episode: 1149, duration: 4.972s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 735758/1000000: episode: 1150, duration: 5.120s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 736382/1000000: episode: 1151, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 737017/1000000: episode: 1152, duration: 5.076s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000 737635/1000000: episode: 1153, duration: 4.942s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 738257/1000000: episode: 1154, duration: 4.946s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 738886/1000000: episode: 1155, duration: 5.050s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 739516/1000000: episode: 1156, duration: 5.036s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 740148/1000000: episode: 1157, duration: 5.051s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 740759/1000000: episode: 1158, duration: 4.870s, episode steps: 611, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000 741388/1000000: episode: 1159, duration: 5.011s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 742012/1000000: episode: 1160, duration: 4.999s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 742635/1000000: episode: 1161, duration: 4.973s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000 743272/1000000: episode: 1162, duration: 5.071s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 743909/1000000: episode: 1163, duration: 5.088s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 744537/1000000: episode: 1164, duration: 4.997s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 745167/1000000: episode: 1165, duration: 5.020s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 745792/1000000: episode: 1166, duration: 4.989s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 746412/1000000: episode: 1167, duration: 4.938s, episode steps: 620, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 747033/1000000: episode: 1168, duration: 4.943s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 747656/1000000: episode: 1169, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 748290/1000000: episode: 1170, duration: 5.036s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 748925/1000000: episode: 1171, duration: 5.094s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 749548/1000000: episode: 1172, duration: 5.009s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 750174/1000000: episode: 1173, duration: 5.003s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 750798/1000000: episode: 1174, duration: 4.993s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 751440/1000000: episode: 1175, duration: 5.129s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000 752077/1000000: episode: 1176, duration: 5.114s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 752699/1000000: episode: 1177, duration: 4.975s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 753316/1000000: episode: 1178, duration: 4.935s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 753944/1000000: episode: 1179, duration: 5.035s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 754573/1000000: episode: 1180, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 755200/1000000: episode: 1181, duration: 4.999s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 755815/1000000: episode: 1182, duration: 4.931s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 756444/1000000: episode: 1183, duration: 5.046s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 757069/1000000: episode: 1184, duration: 4.997s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 757691/1000000: episode: 1185, duration: 4.979s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 758318/1000000: episode: 1186, duration: 5.001s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 758932/1000000: episode: 1187, duration: 4.940s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 759553/1000000: episode: 1188, duration: 4.984s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 760178/1000000: episode: 1189, duration: 5.025s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 760806/1000000: episode: 1190, duration: 5.051s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 761430/1000000: episode: 1191, duration: 5.019s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 762059/1000000: episode: 1192, duration: 5.084s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 762684/1000000: episode: 1193, duration: 5.018s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 763316/1000000: episode: 1194, duration: 5.077s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 763951/1000000: episode: 1195, duration: 5.106s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 764573/1000000: episode: 1196, duration: 4.962s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 765199/1000000: episode: 1197, duration: 5.014s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 765837/1000000: episode: 1198, duration: 5.137s, episode steps: 638, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 766457/1000000: episode: 1199, duration: 4.949s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 767082/1000000: episode: 1200, duration: 4.973s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 767708/1000000: episode: 1201, duration: 4.977s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 768338/1000000: episode: 1202, duration: 5.005s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 768960/1000000: episode: 1203, duration: 4.989s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 769584/1000000: episode: 1204, duration: 5.016s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 770209/1000000: episode: 1205, duration: 5.003s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 770838/1000000: episode: 1206, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 771466/1000000: episode: 1207, duration: 10.561s, episode steps: 628, steps per second: 59, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 772090/1000000: episode: 1208, duration: 7.410s, episode steps: 624, steps per second: 84, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 772718/1000000: episode: 1209, duration: 18.837s, episode steps: 628, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 773335/1000000: episode: 1210, duration: 18.701s, episode steps: 617, steps per second: 33, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 773954/1000000: episode: 1211, duration: 15.716s, episode steps: 619, steps per second: 39, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000 774581/1000000: episode: 1212, duration: 13.118s, episode steps: 627, steps per second: 48, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 775217/1000000: episode: 1213, duration: 13.156s, episode steps: 636, steps per second: 48, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 775842/1000000: episode: 1214, duration: 13.056s, episode steps: 625, steps per second: 48, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 776472/1000000: episode: 1215, duration: 14.213s, episode steps: 630, steps per second: 44, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 777092/1000000: episode: 1216, duration: 8.248s, episode steps: 620, steps per second: 75, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 777725/1000000: episode: 1217, duration: 9.787s, episode steps: 633, steps per second: 65, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000 778354/1000000: episode: 1218, duration: 18.938s, episode steps: 629, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 778985/1000000: episode: 1219, duration: 18.334s, episode steps: 631, steps per second: 34, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 779615/1000000: episode: 1220, duration: 5.101s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 780240/1000000: episode: 1221, duration: 11.245s, episode steps: 625, steps per second: 56, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 780873/1000000: episode: 1222, duration: 19.039s, episode steps: 633, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 781496/1000000: episode: 1223, duration: 16.403s, episode steps: 623, steps per second: 38, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 782122/1000000: episode: 1224, duration: 5.365s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 782755/1000000: episode: 1225, duration: 5.567s, episode steps: 633, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 783378/1000000: episode: 1226, duration: 5.556s, episode steps: 623, steps per second: 112, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 784015/1000000: episode: 1227, duration: 5.116s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 784643/1000000: episode: 1228, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 785270/1000000: episode: 1229, duration: 5.104s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 785890/1000000: episode: 1230, duration: 5.019s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 786522/1000000: episode: 1231, duration: 5.076s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 787140/1000000: episode: 1232, duration: 4.983s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 787759/1000000: episode: 1233, duration: 4.996s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 788383/1000000: episode: 1234, duration: 5.010s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 789016/1000000: episode: 1235, duration: 5.109s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 789644/1000000: episode: 1236, duration: 5.024s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 790276/1000000: episode: 1237, duration: 5.041s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 790914/1000000: episode: 1238, duration: 5.111s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 791551/1000000: episode: 1239, duration: 5.124s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 792185/1000000: episode: 1240, duration: 5.071s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 792811/1000000: episode: 1241, duration: 5.042s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 793441/1000000: episode: 1242, duration: 5.035s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 794072/1000000: episode: 1243, duration: 5.097s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 794703/1000000: episode: 1244, duration: 5.041s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 795335/1000000: episode: 1245, duration: 5.059s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 795959/1000000: episode: 1246, duration: 4.999s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 796584/1000000: episode: 1247, duration: 5.022s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 797214/1000000: episode: 1248, duration: 5.036s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 797824/1000000: episode: 1249, duration: 4.923s, episode steps: 610, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.574434, mean_absolute_error: 0.014038, acc: 0.998358, mean_q: 1.000000 798454/1000000: episode: 1250, duration: 5.066s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 799084/1000000: episode: 1251, duration: 5.035s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 799712/1000000: episode: 1252, duration: 5.059s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 800338/1000000: episode: 1253, duration: 5.021s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 800954/1000000: episode: 1254, duration: 4.950s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000 801573/1000000: episode: 1255, duration: 4.976s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 802194/1000000: episode: 1256, duration: 4.943s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 802817/1000000: episode: 1257, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 803449/1000000: episode: 1258, duration: 5.041s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 804079/1000000: episode: 1259, duration: 5.048s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 804711/1000000: episode: 1260, duration: 5.040s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 805334/1000000: episode: 1261, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 805960/1000000: episode: 1262, duration: 5.004s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 806591/1000000: episode: 1263, duration: 5.039s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 807219/1000000: episode: 1264, duration: 5.020s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 807841/1000000: episode: 1265, duration: 4.996s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 808465/1000000: episode: 1266, duration: 4.988s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 809077/1000000: episode: 1267, duration: 4.905s, episode steps: 612, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 809711/1000000: episode: 1268, duration: 5.065s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 810340/1000000: episode: 1269, duration: 5.044s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 810971/1000000: episode: 1270, duration: 5.041s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 811606/1000000: episode: 1271, duration: 5.086s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 812244/1000000: episode: 1272, duration: 5.397s, episode steps: 638, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 812863/1000000: episode: 1273, duration: 5.089s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 813493/1000000: episode: 1274, duration: 5.050s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 814124/1000000: episode: 1275, duration: 5.076s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.572732, mean_absolute_error: 0.014134, acc: 0.996825, mean_q: 1.000000 814747/1000000: episode: 1276, duration: 4.996s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 815364/1000000: episode: 1277, duration: 4.961s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000 815988/1000000: episode: 1278, duration: 4.998s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 816618/1000000: episode: 1279, duration: 5.080s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 817250/1000000: episode: 1280, duration: 5.058s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 817863/1000000: episode: 1281, duration: 4.897s, episode steps: 613, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000 818484/1000000: episode: 1282, duration: 4.960s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 819113/1000000: episode: 1283, duration: 5.048s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 819746/1000000: episode: 1284, duration: 5.073s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 820365/1000000: episode: 1285, duration: 4.967s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 820989/1000000: episode: 1286, duration: 5.030s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 821603/1000000: episode: 1287, duration: 4.936s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 822220/1000000: episode: 1288, duration: 4.923s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000 822839/1000000: episode: 1289, duration: 5.028s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 823471/1000000: episode: 1290, duration: 5.103s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 824108/1000000: episode: 1291, duration: 5.098s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 824729/1000000: episode: 1292, duration: 4.983s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 825348/1000000: episode: 1293, duration: 4.937s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 825972/1000000: episode: 1294, duration: 5.000s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 826596/1000000: episode: 1295, duration: 4.988s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 827223/1000000: episode: 1296, duration: 5.040s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 827845/1000000: episode: 1297, duration: 4.984s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 828480/1000000: episode: 1298, duration: 5.125s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 829114/1000000: episode: 1299, duration: 5.066s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 829749/1000000: episode: 1300, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 830378/1000000: episode: 1301, duration: 5.002s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 831016/1000000: episode: 1302, duration: 5.073s, episode steps: 638, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 831638/1000000: episode: 1303, duration: 4.965s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 832267/1000000: episode: 1304, duration: 5.010s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 832899/1000000: episode: 1305, duration: 5.025s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 833540/1000000: episode: 1306, duration: 5.166s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 834172/1000000: episode: 1307, duration: 5.047s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 834799/1000000: episode: 1308, duration: 5.030s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 835440/1000000: episode: 1309, duration: 5.091s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 836068/1000000: episode: 1310, duration: 5.007s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 836695/1000000: episode: 1311, duration: 5.019s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 837323/1000000: episode: 1312, duration: 5.011s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 837949/1000000: episode: 1313, duration: 4.996s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 838576/1000000: episode: 1314, duration: 5.012s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 839204/1000000: episode: 1315, duration: 5.027s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 839836/1000000: episode: 1316, duration: 5.036s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 840459/1000000: episode: 1317, duration: 4.958s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 841088/1000000: episode: 1318, duration: 4.997s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 841705/1000000: episode: 1319, duration: 4.919s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 842341/1000000: episode: 1320, duration: 5.055s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 842979/1000000: episode: 1321, duration: 5.075s, episode steps: 638, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.986 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000 843604/1000000: episode: 1322, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 844232/1000000: episode: 1323, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 844868/1000000: episode: 1324, duration: 5.056s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 845492/1000000: episode: 1325, duration: 5.029s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 846124/1000000: episode: 1326, duration: 5.048s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 846753/1000000: episode: 1327, duration: 5.005s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 847385/1000000: episode: 1328, duration: 5.043s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 848015/1000000: episode: 1329, duration: 5.041s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 848638/1000000: episode: 1330, duration: 4.965s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 849259/1000000: episode: 1331, duration: 4.940s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 849892/1000000: episode: 1332, duration: 5.047s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 850503/1000000: episode: 1333, duration: 4.853s, episode steps: 611, steps per second: 126, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000 851128/1000000: episode: 1334, duration: 4.979s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 851757/1000000: episode: 1335, duration: 5.021s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 852393/1000000: episode: 1336, duration: 5.068s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 853019/1000000: episode: 1337, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 853647/1000000: episode: 1338, duration: 5.060s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 854268/1000000: episode: 1339, duration: 4.992s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 854889/1000000: episode: 1340, duration: 4.947s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 855515/1000000: episode: 1341, duration: 4.991s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 856145/1000000: episode: 1342, duration: 5.031s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 856779/1000000: episode: 1343, duration: 5.085s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 857404/1000000: episode: 1344, duration: 5.002s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000 858026/1000000: episode: 1345, duration: 4.987s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 858654/1000000: episode: 1346, duration: 5.018s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 859269/1000000: episode: 1347, duration: 4.921s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 859883/1000000: episode: 1348, duration: 4.911s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 860508/1000000: episode: 1349, duration: 4.996s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 861144/1000000: episode: 1350, duration: 5.068s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 861773/1000000: episode: 1351, duration: 5.021s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 862414/1000000: episode: 1352, duration: 5.109s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 863035/1000000: episode: 1353, duration: 4.971s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 863661/1000000: episode: 1354, duration: 4.982s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 864286/1000000: episode: 1355, duration: 4.999s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 864917/1000000: episode: 1356, duration: 5.035s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 865537/1000000: episode: 1357, duration: 5.037s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 866164/1000000: episode: 1358, duration: 5.046s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 866794/1000000: episode: 1359, duration: 5.041s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 867423/1000000: episode: 1360, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 868051/1000000: episode: 1361, duration: 5.001s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 868686/1000000: episode: 1362, duration: 5.080s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 869323/1000000: episode: 1363, duration: 5.110s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 869948/1000000: episode: 1364, duration: 4.978s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 870569/1000000: episode: 1365, duration: 4.954s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 871198/1000000: episode: 1366, duration: 5.022s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000 871829/1000000: episode: 1367, duration: 5.055s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 872455/1000000: episode: 1368, duration: 5.007s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 873084/1000000: episode: 1369, duration: 5.023s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 873711/1000000: episode: 1370, duration: 5.000s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 874347/1000000: episode: 1371, duration: 5.074s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 874971/1000000: episode: 1372, duration: 4.968s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 875595/1000000: episode: 1373, duration: 4.968s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 876233/1000000: episode: 1374, duration: 5.097s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 876856/1000000: episode: 1375, duration: 4.969s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 877491/1000000: episode: 1376, duration: 5.064s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 878111/1000000: episode: 1377, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 878750/1000000: episode: 1378, duration: 5.080s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 879369/1000000: episode: 1379, duration: 4.942s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 880000/1000000: episode: 1380, duration: 5.031s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 880624/1000000: episode: 1381, duration: 5.004s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 881260/1000000: episode: 1382, duration: 5.058s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 881901/1000000: episode: 1383, duration: 5.073s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 882525/1000000: episode: 1384, duration: 5.003s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 883438/1000000: episode: 1385, duration: 7.262s, episode steps: 913, steps per second: 126, episode reward: 840.000, mean reward: 0.920 [0.000, 400.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 112.053879, mean_absolute_error: 0.103879, acc: 0.997807, mean_q: 1.000000 884059/1000000: episode: 1386, duration: 4.934s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 884687/1000000: episode: 1387, duration: 5.032s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 885317/1000000: episode: 1388, duration: 5.021s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 885947/1000000: episode: 1389, duration: 5.020s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 886580/1000000: episode: 1390, duration: 5.070s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 887202/1000000: episode: 1391, duration: 5.495s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000 887825/1000000: episode: 1392, duration: 4.980s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 888458/1000000: episode: 1393, duration: 5.028s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 889097/1000000: episode: 1394, duration: 5.060s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 890126/1000000: episode: 1395, duration: 8.212s, episode steps: 1029, steps per second: 125, episode reward: 440.000, mean reward: 0.428 [0.000, 200.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.770 [0.000, 228.000], loss: 21.592552, mean_absolute_error: 0.049051, acc: 0.998054, mean_q: 1.000000 890748/1000000: episode: 1396, duration: 4.987s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 891387/1000000: episode: 1397, duration: 5.105s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 892032/1000000: episode: 1398, duration: 5.147s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000 892661/1000000: episode: 1399, duration: 4.993s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 893282/1000000: episode: 1400, duration: 4.966s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 893906/1000000: episode: 1401, duration: 4.983s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 894537/1000000: episode: 1402, duration: 5.028s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 895161/1000000: episode: 1403, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 895786/1000000: episode: 1404, duration: 4.993s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 896418/1000000: episode: 1405, duration: 5.059s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 897047/1000000: episode: 1406, duration: 5.022s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 897677/1000000: episode: 1407, duration: 5.045s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 898307/1000000: episode: 1408, duration: 5.084s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 898944/1000000: episode: 1409, duration: 5.097s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 899564/1000000: episode: 1410, duration: 4.985s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 900197/1000000: episode: 1411, duration: 5.603s, episode steps: 633, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 900828/1000000: episode: 1412, duration: 5.294s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 901464/1000000: episode: 1413, duration: 5.259s, episode steps: 636, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 902100/1000000: episode: 1414, duration: 5.287s, episode steps: 636, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 902722/1000000: episode: 1415, duration: 5.427s, episode steps: 622, steps per second: 115, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 903344/1000000: episode: 1416, duration: 5.294s, episode steps: 622, steps per second: 117, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 903972/1000000: episode: 1417, duration: 5.573s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 904599/1000000: episode: 1418, duration: 5.044s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 905228/1000000: episode: 1419, duration: 5.013s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 905864/1000000: episode: 1420, duration: 5.101s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000 906491/1000000: episode: 1421, duration: 5.031s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 907119/1000000: episode: 1422, duration: 8.164s, episode steps: 628, steps per second: 77, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 907751/1000000: episode: 1423, duration: 5.771s, episode steps: 632, steps per second: 110, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 908375/1000000: episode: 1424, duration: 42.594s, episode steps: 624, steps per second: 15, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 909009/1000000: episode: 1425, duration: 8.766s, episode steps: 634, steps per second: 72, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 909645/1000000: episode: 1426, duration: 5.533s, episode steps: 636, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 910273/1000000: episode: 1427, duration: 5.192s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 910885/1000000: episode: 1428, duration: 4.955s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 911506/1000000: episode: 1429, duration: 5.088s, episode steps: 621, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 912142/1000000: episode: 1430, duration: 5.168s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 912771/1000000: episode: 1431, duration: 5.651s, episode steps: 629, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 913394/1000000: episode: 1432, duration: 5.878s, episode steps: 623, steps per second: 106, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 914013/1000000: episode: 1433, duration: 5.492s, episode steps: 619, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 914626/1000000: episode: 1434, duration: 4.973s, episode steps: 613, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000 915244/1000000: episode: 1435, duration: 5.005s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 915859/1000000: episode: 1436, duration: 5.006s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 916482/1000000: episode: 1437, duration: 5.262s, episode steps: 623, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 917121/1000000: episode: 1438, duration: 5.430s, episode steps: 639, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000 917746/1000000: episode: 1439, duration: 5.107s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 918373/1000000: episode: 1440, duration: 5.110s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 919004/1000000: episode: 1441, duration: 5.103s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 919632/1000000: episode: 1442, duration: 5.094s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 920265/1000000: episode: 1443, duration: 5.116s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 920891/1000000: episode: 1444, duration: 5.066s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 921511/1000000: episode: 1445, duration: 5.087s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000 922132/1000000: episode: 1446, duration: 5.247s, episode steps: 621, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 922757/1000000: episode: 1447, duration: 5.114s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 923385/1000000: episode: 1448, duration: 5.237s, episode steps: 628, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 924022/1000000: episode: 1449, duration: 5.251s, episode steps: 637, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 924654/1000000: episode: 1450, duration: 5.176s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 925477/1000000: episode: 1451, duration: 6.716s, episode steps: 823, steps per second: 123, episode reward: 240.000, mean reward: 0.292 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.800 [0.000, 228.000], loss: 2.675342, mean_absolute_error: 0.034034, acc: 0.997567, mean_q: 1.000000 926117/1000000: episode: 1452, duration: 5.475s, episode steps: 640, steps per second: 117, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 926738/1000000: episode: 1453, duration: 5.389s, episode steps: 621, steps per second: 115, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 927373/1000000: episode: 1454, duration: 5.585s, episode steps: 635, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 928007/1000000: episode: 1455, duration: 5.203s, episode steps: 634, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 928637/1000000: episode: 1456, duration: 5.373s, episode steps: 630, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 929266/1000000: episode: 1457, duration: 5.329s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 929896/1000000: episode: 1458, duration: 5.095s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 930530/1000000: episode: 1459, duration: 5.135s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 931161/1000000: episode: 1460, duration: 5.116s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000 931783/1000000: episode: 1461, duration: 5.121s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 932405/1000000: episode: 1462, duration: 5.046s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 933029/1000000: episode: 1463, duration: 5.058s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 933665/1000000: episode: 1464, duration: 5.144s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 934290/1000000: episode: 1465, duration: 5.046s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 934920/1000000: episode: 1466, duration: 5.092s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 935540/1000000: episode: 1467, duration: 5.011s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 936177/1000000: episode: 1468, duration: 5.193s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000 936801/1000000: episode: 1469, duration: 5.278s, episode steps: 624, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 937415/1000000: episode: 1470, duration: 4.973s, episode steps: 614, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000 938042/1000000: episode: 1471, duration: 5.071s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000 938673/1000000: episode: 1472, duration: 5.109s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 939307/1000000: episode: 1473, duration: 5.135s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000 939929/1000000: episode: 1474, duration: 5.260s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 940787/1000000: episode: 1475, duration: 7.108s, episode steps: 858, steps per second: 121, episode reward: 440.000, mean reward: 0.513 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 25.900974, mean_absolute_error: 0.058617, acc: 0.997666, mean_q: 1.000000 941407/1000000: episode: 1476, duration: 5.145s, episode steps: 620, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 942034/1000000: episode: 1477, duration: 5.198s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 942666/1000000: episode: 1478, duration: 5.215s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 943288/1000000: episode: 1479, duration: 5.114s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 943908/1000000: episode: 1480, duration: 5.107s, episode steps: 620, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 944549/1000000: episode: 1481, duration: 5.290s, episode steps: 641, steps per second: 121, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000 945171/1000000: episode: 1482, duration: 5.111s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 945802/1000000: episode: 1483, duration: 5.186s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 946441/1000000: episode: 1484, duration: 5.245s, episode steps: 639, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 947064/1000000: episode: 1485, duration: 5.128s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 947684/1000000: episode: 1486, duration: 5.095s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000 948309/1000000: episode: 1487, duration: 5.144s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 948927/1000000: episode: 1488, duration: 5.026s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 949555/1000000: episode: 1489, duration: 5.095s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 950183/1000000: episode: 1490, duration: 5.107s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 950796/1000000: episode: 1491, duration: 4.963s, episode steps: 613, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000 951424/1000000: episode: 1492, duration: 5.080s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 952061/1000000: episode: 1493, duration: 5.157s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 952697/1000000: episode: 1494, duration: 5.138s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 953327/1000000: episode: 1495, duration: 5.098s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 953960/1000000: episode: 1496, duration: 5.113s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 954588/1000000: episode: 1497, duration: 5.081s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 955207/1000000: episode: 1498, duration: 5.007s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 955822/1000000: episode: 1499, duration: 4.979s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000 956443/1000000: episode: 1500, duration: 5.126s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 957074/1000000: episode: 1501, duration: 5.222s, episode steps: 631, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 957711/1000000: episode: 1502, duration: 5.283s, episode steps: 637, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000 958328/1000000: episode: 1503, duration: 5.092s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000 958956/1000000: episode: 1504, duration: 5.188s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000 959580/1000000: episode: 1505, duration: 5.167s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 960216/1000000: episode: 1506, duration: 5.244s, episode steps: 636, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000 960838/1000000: episode: 1507, duration: 5.127s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 961471/1000000: episode: 1508, duration: 5.215s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 962105/1000000: episode: 1509, duration: 5.234s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000 962723/1000000: episode: 1510, duration: 5.115s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000 963350/1000000: episode: 1511, duration: 5.193s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 963973/1000000: episode: 1512, duration: 5.141s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000 964595/1000000: episode: 1513, duration: 5.168s, episode steps: 622, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 965223/1000000: episode: 1514, duration: 5.175s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 965852/1000000: episode: 1515, duration: 5.163s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 966479/1000000: episode: 1516, duration: 5.169s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 967108/1000000: episode: 1517, duration: 5.147s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000 967748/1000000: episode: 1518, duration: 5.173s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000 968378/1000000: episode: 1519, duration: 5.378s, episode steps: 630, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 969013/1000000: episode: 1520, duration: 5.241s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 969630/1000000: episode: 1521, duration: 5.096s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 970255/1000000: episode: 1522, duration: 5.176s, episode steps: 625, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 970880/1000000: episode: 1523, duration: 5.147s, episode steps: 625, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000 971518/1000000: episode: 1524, duration: 5.242s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 972135/1000000: episode: 1525, duration: 5.084s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 972754/1000000: episode: 1526, duration: 5.090s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 973384/1000000: episode: 1527, duration: 5.171s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000 974008/1000000: episode: 1528, duration: 5.156s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 974635/1000000: episode: 1529, duration: 5.168s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 975261/1000000: episode: 1530, duration: 5.171s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 975882/1000000: episode: 1531, duration: 5.120s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 976504/1000000: episode: 1532, duration: 5.134s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000 977142/1000000: episode: 1533, duration: 5.247s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 977768/1000000: episode: 1534, duration: 5.139s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 978395/1000000: episode: 1535, duration: 5.171s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 979019/1000000: episode: 1536, duration: 5.160s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 979649/1000000: episode: 1537, duration: 5.191s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000 980261/1000000: episode: 1538, duration: 5.061s, episode steps: 612, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000 980880/1000000: episode: 1539, duration: 5.077s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 981501/1000000: episode: 1540, duration: 5.114s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000 982139/1000000: episode: 1541, duration: 5.248s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000 982768/1000000: episode: 1542, duration: 5.177s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 983407/1000000: episode: 1543, duration: 5.293s, episode steps: 639, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000 984038/1000000: episode: 1544, duration: 5.189s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000 984670/1000000: episode: 1545, duration: 5.240s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 985291/1000000: episode: 1546, duration: 5.565s, episode steps: 621, steps per second: 112, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 985920/1000000: episode: 1547, duration: 5.219s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 986553/1000000: episode: 1548, duration: 5.261s, episode steps: 633, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 987181/1000000: episode: 1549, duration: 5.098s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 987813/1000000: episode: 1550, duration: 5.126s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 988434/1000000: episode: 1551, duration: 5.033s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 989062/1000000: episode: 1552, duration: 5.072s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 989697/1000000: episode: 1553, duration: 5.121s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 990316/1000000: episode: 1554, duration: 5.016s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000 990949/1000000: episode: 1555, duration: 5.125s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 991577/1000000: episode: 1556, duration: 5.152s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 992198/1000000: episode: 1557, duration: 5.175s, episode steps: 621, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 992815/1000000: episode: 1558, duration: 5.014s, episode steps: 617, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000 993443/1000000: episode: 1559, duration: 5.096s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 994072/1000000: episode: 1560, duration: 5.097s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000 994696/1000000: episode: 1561, duration: 5.043s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000 995317/1000000: episode: 1562, duration: 5.024s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 995952/1000000: episode: 1563, duration: 5.129s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000 996573/1000000: episode: 1564, duration: 5.026s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000 997199/1000000: episode: 1565, duration: 5.080s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000 997831/1000000: episode: 1566, duration: 5.106s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000 998458/1000000: episode: 1567, duration: 5.075s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000 999086/1000000: episode: 1568, duration: 5.077s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000 999719/1000000: episode: 1569, duration: 5.117s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000 done, took 8629.718 seconds Testing for 10 episodes ... Episode 1: reward: 70.000, steps: 628 Episode 2: reward: 70.000, steps: 638 Episode 3: reward: 70.000, steps: 628 Episode 4: reward: 70.000, steps: 630 Episode 5: reward: 70.000, steps: 625 Episode 6: reward: 70.000, steps: 630 Episode 7: reward: 70.000, steps: 618 Episode 8: reward: 70.000, steps: 634 Episode 9: reward: 70.000, steps: 624 Episode 10: reward: 70.000, steps: 633
<keras.callbacks.History at 0x142ebb908>
## Plot loss and accuracy por more episodes in order to see if the reward increases
## The reward seems to be the same but the loss function gets smaller
episodes = list(range(0, 96))
loss_Sarsa_Bolzman = [2.938,23.077,2.101,1.326,0.585,0.585,3.030,28.383,0.573,
0.566,0.571,0.579,0.571,0.566,109.56,0.5706,0.5656,0.564,
0.704,0.556,0.868,0.568,0.579,0.571,0.574,0.561,0.713,0.559,
0.553,0.568,0.567,0.557,0.573,0.735,0.562,0.566,0.550,0.565,
3.034,0.565,23.642,0.869,20.407,27.008,0.562,0.555,0.561,0.558,
0.570,0.553,0.565,0.641,0.568, 0.572,0.571,121.95,0.559,118.69,
0.556,0.558,0.556,0.560,0.562,0.565,0.567,18.003,0.565,109.29,
0.561,0.572,2.371,0.573,0.555,0.556,0.569,0.557,0.555,0.563, 0.740,
0.556,0.553,0.734,0.562,0.559,2.77,0.557,101.28,0.554,1.088,0.568,
0.880,0.570,0.555,0.559,0.557,0.561]
plt.plot(episodes, loss_Sarsa_Bolzman, 'r--')
plt.axis([0, 110, 0, 110])
plt.show()
# Next, we build a neural network model
model2 = Sequential()
model2.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model2.add(Dense(300, activation= 'tanh')) # layer 1: 3 cells with tanh activation function
model2.add(Dense(nb_actions))
model2.add(Dense(600, activation= 'sigmoid')) #layer 2 : 6 cells with sigmoid activation function
model2.add(Dense(nb_actions))
model2.add(Dense(600, activation= 'sigmoid')) #layer 3 : 6 cells with sigmoid activation function
model2.add(Dense(nb_actions))
model2.add(Dense(600, activation= 'sigmoid')) #layer 4 : 6 cells with sigmoid activation function
model2.add(Dense(nb_actions))
model2.add(Dense(300, activation= 'tanh')) #layer 5 : 3 cells with tanh activation function
model2.add(Dense(nb_actions))
model2.add(Dense(300, activation= 'sigmoid')) #layer 6 : 6 cells with sigmoid activation function
model2.add(Dense(nb_actions))
model2.add(Activation('softmax')) # one layer of 1 unit with sigmoid activation function
print(model2.summary())
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_1 (Flatten) (None, 100800) 0 _________________________________________________________________ dense_1 (Dense) (None, 300) 30240300 _________________________________________________________________ dense_2 (Dense) (None, 9) 2709 _________________________________________________________________ dense_3 (Dense) (None, 600) 6000 _________________________________________________________________ dense_4 (Dense) (None, 9) 5409 _________________________________________________________________ dense_5 (Dense) (None, 600) 6000 _________________________________________________________________ dense_6 (Dense) (None, 9) 5409 _________________________________________________________________ dense_7 (Dense) (None, 600) 6000 _________________________________________________________________ dense_8 (Dense) (None, 9) 5409 _________________________________________________________________ dense_9 (Dense) (None, 300) 3000 _________________________________________________________________ dense_10 (Dense) (None, 9) 2709 _________________________________________________________________ dense_11 (Dense) (None, 300) 3000 _________________________________________________________________ dense_12 (Dense) (None, 9) 2709 _________________________________________________________________ activation_1 (Activation) (None, 9) 0 ================================================================= Total params: 30,288,654 Trainable params: 30,288,654 Non-trainable params: 0 _________________________________________________________________ None
#DQN -- Deep Reinforcement Learning
#Configure and compile the agent.
# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
memory = SequentialMemory(limit=1000000, window_length=1)
dqn2 = DQNAgent(model=model2, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
target_model_update=1e-2, policy=policy)
dqn2.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
import os.path
file_path = 'dqn_backup_weights.h5f'
if os.path.exists(file_path):
dqn2.load_weights(file_path)
from rl.callbacks import Callback
class Saver(Callback):
def on_episode_end(self, episode, logs={}):
print('episode callback')
if episode % 1 == 0:
self.model.save_weights('dqn_backup_weights.h5f', overwrite=True)
s = Saver()
dqn2.fit(env, nb_steps=100000,callbacks=[s] , visualize=False, verbose=2)
#dqn2.test(env, nb_episodes=10, visualize=True)
Training for 100000 steps ... episode callback 658/100000: episode: 1, duration: 336.758s, episode steps: 658, steps per second: 2, episode reward: 290.000, mean reward: 0.441 [0.000, 10.000], mean action: 3.857 [0.000, 8.000], mean observation: 72.810 [0.000, 228.000], loss: 2.103166, mean_absolute_error: 0.148895, acc: 0.111409, mean_q: 0.205358, mean_eps: 0.999681 episode callback 1086/100000: episode: 2, duration: 237.365s, episode steps: 428, steps per second: 2, episode reward: 70.000, mean reward: 0.164 [0.000, 10.000], mean action: 3.928 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 2.026317, mean_absolute_error: 0.149495, acc: 0.114559, mean_q: 0.213103, mean_eps: 0.999216 episode callback 1718/100000: episode: 3, duration: 342.532s, episode steps: 632, steps per second: 2, episode reward: 190.000, mean reward: 0.301 [0.000, 10.000], mean action: 4.198 [0.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 1.937029, mean_absolute_error: 0.150381, acc: 0.109573, mean_q: 0.250200, mean_eps: 0.998739 episode callback 2308/100000: episode: 4, duration: 324.798s, episode steps: 590, steps per second: 2, episode reward: 150.000, mean reward: 0.254 [0.000, 10.000], mean action: 3.812 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 2.116763, mean_absolute_error: 0.149315, acc: 0.116102, mean_q: 0.224220, mean_eps: 0.998189 episode callback 2978/100000: episode: 5, duration: 362.901s, episode steps: 670, steps per second: 2, episode reward: 320.000, mean reward: 0.478 [0.000, 10.000], mean action: 3.972 [0.000, 8.000], mean observation: 72.779 [0.000, 228.000], loss: 1.991054, mean_absolute_error: 0.156376, acc: 0.113666, mean_q: 0.301945, mean_eps: 0.997622 episode callback 4026/100000: episode: 6, duration: 568.897s, episode steps: 1048, steps per second: 2, episode reward: 310.000, mean reward: 0.296 [0.000, 10.000], mean action: 4.072 [0.000, 8.000], mean observation: 72.807 [0.000, 228.000], loss: 1.831619, mean_absolute_error: 0.148026, acc: 0.114206, mean_q: 0.219604, mean_eps: 0.996849
dqn2.fit(env, nb_steps=1000000, visualize=True, verbose=2)
dqn2.test(env, nb_episodes=10, visualize=True)
Training for 1000000 steps ... 654/1000000: episode: 1, duration: 317.012s, episode steps: 654, steps per second: 2, episode reward: 120.000, mean reward: 0.183 [0.000, 10.000], mean action: 3.887 [0.000, 8.000], mean observation: 72.928 [0.000, 228.000], loss: 1.448611, mean_absolute_error: 0.186551, acc: 0.101472, mean_q: 0.763487, mean_eps: 0.999683 1336/1000000: episode: 2, duration: 352.722s, episode steps: 682, steps per second: 2, episode reward: 260.000, mean reward: 0.381 [0.000, 10.000], mean action: 3.708 [0.000, 8.000], mean observation: 72.801 [0.000, 228.000], loss: 1.389616, mean_absolute_error: 0.164051, acc: 0.099203, mean_q: 0.545559, mean_eps: 0.999105 2407/1000000: episode: 3, duration: 548.344s, episode steps: 1071, steps per second: 2, episode reward: 710.000, mean reward: 0.663 [0.000, 200.000], mean action: 4.073 [0.000, 8.000], mean observation: 72.658 [0.000, 228.000], loss: 10.424305, mean_absolute_error: 0.192876, acc: 0.112803, mean_q: 0.673903, mean_eps: 0.998316 3048/1000000: episode: 4, duration: 336.784s, episode steps: 641, steps per second: 2, episode reward: 220.000, mean reward: 0.343 [0.000, 10.000], mean action: 4.105 [0.000, 8.000], mean observation: 72.845 [0.000, 228.000], loss: 6.729588, mean_absolute_error: 0.240661, acc: 0.109887, mean_q: 0.999652, mean_eps: 0.997546 3740/1000000: episode: 5, duration: 361.328s, episode steps: 692, steps per second: 2, episode reward: 190.000, mean reward: 0.275 [0.000, 10.000], mean action: 3.990 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 6.518019, mean_absolute_error: 0.241126, acc: 0.111949, mean_q: 0.994125, mean_eps: 0.996946 4359/1000000: episode: 6, duration: 302.134s, episode steps: 619, steps per second: 2, episode reward: 230.000, mean reward: 0.372 [0.000, 10.000], mean action: 3.974 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 8.690007, mean_absolute_error: 0.241739, acc: 0.112126, mean_q: 0.999973, mean_eps: 0.996356 4917/1000000: episode: 7, duration: 283.394s, episode steps: 558, steps per second: 2, episode reward: 220.000, mean reward: 0.394 [0.000, 10.000], mean action: 4.077 [0.000, 8.000], mean observation: 72.862 [0.000, 228.000], loss: 10.461510, mean_absolute_error: 0.241632, acc: 0.119288, mean_q: 0.999966, mean_eps: 0.995826 5493/1000000: episode: 8, duration: 278.382s, episode steps: 576, steps per second: 2, episode reward: 150.000, mean reward: 0.260 [0.000, 10.000], mean action: 3.880 [0.000, 8.000], mean observation: 72.948 [0.000, 228.000], loss: 6.880867, mean_absolute_error: 0.238429, acc: 0.113878, mean_q: 0.986947, mean_eps: 0.995316 6099/1000000: episode: 9, duration: 302.298s, episode steps: 606, steps per second: 2, episode reward: 150.000, mean reward: 0.248 [0.000, 10.000], mean action: 4.140 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 4.512041, mean_absolute_error: 0.235195, acc: 0.114377, mean_q: 1.000000, mean_eps: 0.994784 6743/1000000: episode: 10, duration: 338.222s, episode steps: 644, steps per second: 2, episode reward: 190.000, mean reward: 0.295 [0.000, 10.000], mean action: 4.057 [0.000, 8.000], mean observation: 72.852 [0.000, 228.000], loss: 7.434086, mean_absolute_error: 0.240786, acc: 0.110540, mean_q: 1.000000, mean_eps: 0.994222 7332/1000000: episode: 11, duration: 297.293s, episode steps: 589, steps per second: 2, episode reward: 240.000, mean reward: 0.407 [0.000, 10.000], mean action: 4.049 [0.000, 8.000], mean observation: 72.842 [0.000, 228.000], loss: 5.549702, mean_absolute_error: 0.235086, acc: 0.113487, mean_q: 1.000000, mean_eps: 0.993667 8202/1000000: episode: 12, duration: 449.472s, episode steps: 870, steps per second: 2, episode reward: 270.000, mean reward: 0.310 [0.000, 10.000], mean action: 3.962 [0.000, 8.000], mean observation: 72.817 [0.000, 228.000], loss: 6.081496, mean_absolute_error: 0.237461, acc: 0.110524, mean_q: 1.000000, mean_eps: 0.993010 9120/1000000: episode: 13, duration: 483.860s, episode steps: 918, steps per second: 2, episode reward: 380.000, mean reward: 0.414 [0.000, 10.000], mean action: 3.974 [0.000, 8.000], mean observation: 72.744 [0.000, 228.000], loss: 7.888651, mean_absolute_error: 0.237701, acc: 0.112337, mean_q: 1.000000, mean_eps: 0.992206 9915/1000000: episode: 14, duration: 415.255s, episode steps: 795, steps per second: 2, episode reward: 250.000, mean reward: 0.314 [0.000, 10.000], mean action: 4.072 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 4.046062, mean_absolute_error: 0.236618, acc: 0.108491, mean_q: 1.000000, mean_eps: 0.991435 10512/1000000: episode: 15, duration: 318.684s, episode steps: 597, steps per second: 2, episode reward: 200.000, mean reward: 0.335 [0.000, 10.000], mean action: 4.065 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 4.564064, mean_absolute_error: 0.235717, acc: 0.114374, mean_q: 1.000000, mean_eps: 0.990808 10987/1000000: episode: 16, duration: 251.281s, episode steps: 475, steps per second: 2, episode reward: 160.000, mean reward: 0.337 [0.000, 10.000], mean action: 3.882 [0.000, 8.000], mean observation: 72.839 [0.000, 228.000], loss: 7.823420, mean_absolute_error: 0.240771, acc: 0.113355, mean_q: 1.000000, mean_eps: 0.990326 11657/1000000: episode: 17, duration: 350.044s, episode steps: 670, steps per second: 2, episode reward: 170.000, mean reward: 0.254 [0.000, 10.000], mean action: 4.028 [0.000, 8.000], mean observation: 72.900 [0.000, 228.000], loss: 5.269891, mean_absolute_error: 0.234422, acc: 0.116698, mean_q: 1.000000, mean_eps: 0.989811 12244/1000000: episode: 18, duration: 307.721s, episode steps: 587, steps per second: 2, episode reward: 140.000, mean reward: 0.239 [0.000, 10.000], mean action: 3.951 [0.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 3.343591, mean_absolute_error: 0.232368, acc: 0.111584, mean_q: 1.000000, mean_eps: 0.989245 12849/1000000: episode: 19, duration: 314.754s, episode steps: 605, steps per second: 2, episode reward: 140.000, mean reward: 0.231 [0.000, 10.000], mean action: 4.187 [0.000, 8.000], mean observation: 72.924 [0.000, 228.000], loss: 5.313445, mean_absolute_error: 0.232026, acc: 0.118802, mean_q: 1.000000, mean_eps: 0.988709 13479/1000000: episode: 20, duration: 325.365s, episode steps: 630, steps per second: 2, episode reward: 200.000, mean reward: 0.317 [0.000, 10.000], mean action: 3.921 [0.000, 8.000], mean observation: 72.911 [0.000, 228.000], loss: 5.557315, mean_absolute_error: 0.235413, acc: 0.116319, mean_q: 1.000000, mean_eps: 0.988153