import functools
import matplotlib
from matplotlib import pyplot
import numpy as np
import sys
sys.path.append("..")
from hiora_cartpole import features
from hiora_cartpole import fourier_fa
from hiora_cartpole import easytile_fa
from hiora_cartpole import driver
import gym_ext.tools as gym_tools
import gym
env = gym.make('CartPole-v0')
clipped_high = env.observation_space.high
clipped_high = np.array([2.5, 3.6, 0.27, 3.7])
clipped_low = -clipped_high
state_ranges = np.array([clipped_low, clipped_high])
[2016-09-23 13:07:02,758] Making new env: CartPole-v0
easyt_n_weights, easyt_feature_vec = easytile_fa.make_feature_vec(state_ranges, 2, [5, 7, 5, 7], 8)
#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())
from hiora_cartpole import linfa
eexperience = linfa.init(lmbda=0.9,
init_alpha=0.05,
epsi=0.01,
feature_vec=easyt_feature_vec,
n_weights=easyt_n_weights,
act_space=env.action_space,
theta=None,
is_use_alpha_bounds=True,
map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))
eexperience, steps_per_episode, alpha_per_episode \
= driver.train(env, linfa, eexperience, n_episodes=700, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()
steps_per_episode = driver.exec_greedy(env, eexperience, n_episodes=15, max_steps=600, is_render=True)
%matplotlib notebook
pyplot.plot(eexperience.theta)
pyplot.show()
eexperience.p_feat
eexperience, steps_per_episode, alpha_per_episode \
= driver.train(env, linfa, eexperience, n_episodes=50, max_steps=100, is_render=True)
four_n_weights, four_feature_vec \
= fourier_fa.make_feature_vec(state_ranges,
n_acts=2,
order=3)
#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())
from hiora_cartpole import linfa
fexperience = linfa.init(lmbda=0.9,
init_alpha=0.001,
epsi=0.1,
feature_vec=four_feature_vec,
n_weights=four_n_weights,
act_space=env.action_space,
theta=None,
is_use_alpha_bounds=True,
map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))
fexperience, steps_per_episode, alpha_per_episode \
= driver.train(env, linfa, fexperience, n_episodes=700, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()
Haha, this just explodes after 30 or so episodes and doesn't diverge, at least not before 700 episodes.
pyplot.plot(fexperience.theta)
pyplot.show()
steps_per_episode = driver.exec_greedy(env, fexperience, n_episodes=10, max_steps=600, is_render=True)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-100-4b4073f3b212> in <module>() ----> 1 steps_per_episode = driver.exec_greedy(env, fexperience, n_episodes=10, max_steps=600, is_render=True) /home/erle/repos/cartpole/hiora_cartpole/driver.py in exec_greedy(env, experience, n_episodes, max_steps, is_render) 84 85 for t in xrange(max_steps): ---> 86 is_render and env.render() # pylint: disable=expression-not-assigned 87 action = greedy_act(experience, observation) 88 observation, _, done, _ = env.step(action) /home/erle/.local/lib/python2.7/site-packages/gym/core.pyc in render(self, mode, close) 190 raise error.UnsupportedMode('Unsupported rendering mode: {}. (Supported modes for {}: {})'.format(mode, self, modes)) 191 --> 192 return self._render(mode=mode, close=close) 193 194 def close(self): /home/erle/.local/lib/python2.7/site-packages/gym/envs/classic_control/cartpole.pyc in _render(self, mode, close) 147 self.poletrans.set_rotation(-x[2]) 148 --> 149 return self.viewer.render(return_rgb_array = mode=='rgb_array') /home/erle/.local/lib/python2.7/site-packages/gym/envs/classic_control/rendering.pyc in render(self, return_rgb_array) 80 def render(self, return_rgb_array=False): 81 glClearColor(1,1,1,1) ---> 82 self.window.clear() 83 self.window.switch_to() 84 self.window.dispatch_events() /home/erle/.local/lib/python2.7/site-packages/pyglet/window/__init__.pyc in clear(self) 1149 buffer. The window must be the active context (see `switch_to`). 1150 ''' -> 1151 gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) 1152 1153 def dispatch_event(self, *args): /home/erle/.local/lib/python2.7/site-packages/pyglet/gl/lib.pyc in errcheck(result, func, arguments) 82 pass 83 ---> 84 def errcheck(result, func, arguments): 85 if _debug_gl_trace: 86 try: KeyboardInterrupt:
qfour_n_weights, qfour_feature_vec \
= fourier_fa.make_feature_vec(state_ranges,
n_acts=2,
order=3)
#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())
from hiora_cartpole import linfa
qfexperience = linfa.init(lmbda=0.9,
init_alpha=0.001,
epsi=0.1,
feature_vec=qfour_feature_vec,
n_weights=qfour_n_weights,
act_space=env.action_space,
theta=None,
is_use_alpha_bounds=True,
map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges),
choose_action=linfa.choose_action_Q)
qfexperience, steps_per_episode, alpha_per_episode \
= driver.train(env, linfa, qfexperience, n_episodes=300, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()
steps_per_episode = driver.exec_greedy(env, qfexperience, n_episodes=10, max_steps=600, is_render=True)
fig, ax = pyplot.subplots()
ax.plot(qfexperience.theta)
pyplot.show()