In [1]:

```
import numpy as no
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.agents.ddpg import DDPGAgent
from rl.policy import BoltzmannQPolicy , LinearAnnealedPolicy , EpsGreedyQPolicy
from rl.memory import SequentialMemory
```

In [2]:

```
ENV_NAME_2 = 'Asteroids-v0'
```

In [3]:

```
# Get the environment and extract the number of actions
env = gym.make(ENV_NAME_2)
nb_actions = env.action_space.n
nb_actions
```

Out[3]:

In [4]:

```
# Next, we build a neural network model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(3, activation= 'tanh')) # One layer of 3 units with tanh activation function
model.add(Dense(nb_actions))
model.add(Activation('sigmoid')) # one layer of 1 unit with sigmoid activation function
print(model.summary())
```

In [5]:

```
#DQN -- Deep Reinforcement Learning
#Configure and compile the agent.
#Use every built-in Keras optimizer and metrics!
memory = SequentialMemory(limit=20000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
```

In [6]:

```
## Visualize the training during 500000 steps
dqn.fit(env, nb_steps=500000, visualize=True, verbose=2)
```

Out[6]:

In [24]:

```
#Plot loss variations
import matplotlib.pyplot as plt
episodes = [1455,2423,3192,4037,4472,6292,7098,8897,
10784,11988,13541,14309,14855,15676,17303,18249,
21461,22917,23238,23586,26189,27369,28548,28919,
30695,32084,33770,35502,36281,37151,38717,39922,
41911,42709,43787,46754,48099,48561]
loss = [21.74,36.16,32.86,33.93,31.62,31.17,28.76,27.21,31.20,
30.66,28.269,28.651,25.91,29.79,31.83,33.02,30.15,28.89,
26.92,25.30,27.16,27.08,24.59,30.02,26.48,28.96,30.13,
29.65,27.24,28.61,27.87,27.72,26.7,27.76,27.96,30.41,32.34,25.04]
plt.plot(episodes, loss, 'r--')
plt.axis([0, 50000, 0, 40])
plt.show()
```

In [13]:

```
## Save the model
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME_2), overwrite=True)
```

In [14]:

```
# Evaluate the algorithm for 10 episodes
dqn.test(env, nb_episodes=10, visualize=True)
```

Out[14]:

In [25]:

```
### Another Policy with dqn
```

In [26]:

```
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=.8, value_min=.01,
value_test=.0,
nb_steps=100000)
dqn = DQNAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10,
policy=policy, test_policy=policy, memory = memory,
target_model_update=1e-2)
```

In [27]:

```
dqn.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
```

In [28]:

```
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)
```

Out[28]:

In [31]:

```
episodes_p2 = [2647,5062,6988,7721,9006,9489,
10482,11303,12967,14767,17088,17370,17887,18599,19641,
20361,20921,22419,23198,24514,26366,27983,29873,
30851,31931,32370,34069,35248,36460,38501,39551,
40200,42374,43610]
loss_p2 = [29.99,29.21,30.04,26.03,26.04,26.78,27.92,32.33,25.37,26.68,26.14,28.39,
33.06,24.59,26.5,30.07,30.02,32.05,25.4,29.14,28.68,30.82, 30.10,31.20,
33.85,30.20,35.34,31.25,32.28,30.29,32.97,29.07,31.01,28.14]
plt.plot(episodes_p2, loss_p2, 'r--')
plt.axis([0, 50000, 0, 40])
plt.show()
```

In [ ]:

```
dqn.test(env, nb_episodes=10, visualize=True)
```

In [ ]:

```
#SARSA Agent -- Reinforcement Learning
from rl.agents.sarsa import SARSAAgent
sarsa = SARSAAgent(model, nb_actions,
policy=None, test_policy=None,
gamma=0.99, nb_steps_warmup=10,
train_interval=1)
sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])
sarsa.fit(env, nb_steps=50000, visualize=True, verbose=2)
sarsa.test(env, nb_episodes=10, visualize=True)
```