# 1.1 Model
## DESCRIPTION : 6 layered Neural Network with dropout
from keras.models import Sequential
from keras.layers import Dense, Dropout
def create_model_1():
model = Sequential()
model.add(Dense(128, input_shape=(4,), activation='relu')) #Layer1 : 128 cells with relu activation function
model.add(Dropout(0.6))
model.add(Dense(256, activation="relu")) #Layer2 : 256 cells with relu activation function
model.add(Dropout(0.6))
model.add(Dense(512, activation="relu")) #Layer3 : 512 cells with relu activation function
model.add(Dropout(0.6))
model.add(Dense(256, activation="relu")) #Layer4 : 256 cells with relu activation function
model.add(Dropout(0.6))
model.add(Dense(128, activation="relu")) #Layer5 : 128 cells with relu activation function
model.add(Dropout(0.6))
model.add(Dense(2, activation="softmax")) #Layer5 : softmax last layer transformation
model.compile( #Layer6 : configure the learning process
loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])
print(model.summary())
return model
# 1.2 Model
## DESCRIPTION : dqn_atari
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
def create_model_atari():
model_atari = Sequential()
model_atari.add(Convolution2D(32, 8, 8, subsample=(4, 4))) #Layer1 : convolutional layer 32 batch_size shape (8,8)
model_atari.add(Activation('relu'))
model_atari.add(Convolution2D(64, 4, 4, subsample=(2, 2)))
model_atari.add(Activation('relu'))
model_atari.add(Convolution2D(64, 3, 3, subsample=(1,1)))
model_atari.add(Activation('relu'))
model_atari.add(Flatten())
model_atari.add(Dense(512))
model_atari.add(Dense(nb_actions))
model_atari.add(Activation('linear'))
model.compile( #Layer6 : configure the learning process
loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])
return model
Wraps another policy and decreases a given parameter linearly. (This policy can be used together within EpsGreedyQPolicy to transform eps-value from 1 to 0.1 )
Epsilon greedy policy is a way of selecting random actions with uniform distributions from a set of available actions. Using this policy either we can select random action with epsilon probability and we can select an action with 1-epsilon prob that gives maximum reward in a given state. As parameters we will select --epsilon (eps-val) : probability of an event should occur : from 0 to 1 ( makes an exploration-explotation that depends on this metric)
Epsilon greedy policy with epsilon value == 1
Parameters --epsilon :
A combination of the eps-greedy and Boltzman q-policy.
BGE is invariant with respect to the mean of the rewards but not their variance. The parameter C, which defaults to 1, can be used to correct for this, and should be set to the least upper bound on the standard deviation of the rewards. BGE is only available for training, not testing. For testing purposes, you can achieve approximately the same result as BGE after training for N steps on K actions with parameter C by using the BoltzmannQPolicy and setting tau = C/sqrt(N/K).