-
-
Save iandanforth/e3ffb67cf3623153e968f2afdfb01dc8 to your computer and use it in GitHub Desktop.
""" | |
Classic cart-pole system implemented by Rich Sutton et al. | |
Copied from http://incompleteideas.net/sutton/book/code/pole.c | |
permalink: https://perma.cc/C9ZM-652R | |
Continuous version by Ian Danforth | |
""" | |
import math | |
import gym | |
from gym import spaces, logger | |
from gym.utils import seeding | |
import numpy as np | |
class ContinuousCartPoleEnv(gym.Env): | |
metadata = { | |
'render.modes': ['human', 'rgb_array'], | |
'video.frames_per_second': 50 | |
} | |
def __init__(self): | |
self.gravity = 9.8 | |
self.masscart = 1.0 | |
self.masspole = 0.1 | |
self.total_mass = (self.masspole + self.masscart) | |
self.length = 0.5 # actually half the pole's length | |
self.polemass_length = (self.masspole * self.length) | |
self.force_mag = 30.0 | |
self.tau = 0.02 # seconds between state updates | |
self.min_action = -1.0 | |
self.max_action = 1.0 | |
# Angle at which to fail the episode | |
self.theta_threshold_radians = 12 * 2 * math.pi / 360 | |
self.x_threshold = 2.4 | |
# Angle limit set to 2 * theta_threshold_radians so failing observation | |
# is still within bounds | |
high = np.array([ | |
self.x_threshold * 2, | |
np.finfo(np.float32).max, | |
self.theta_threshold_radians * 2, | |
np.finfo(np.float32).max]) | |
self.action_space = spaces.Box( | |
low=self.min_action, | |
high=self.max_action, | |
shape=(1,) | |
) | |
self.observation_space = spaces.Box(-high, high) | |
self.seed() | |
self.viewer = None | |
self.state = None | |
self.steps_beyond_done = None | |
def seed(self, seed=None): | |
self.np_random, seed = seeding.np_random(seed) | |
return [seed] | |
def stepPhysics(self, force): | |
x, x_dot, theta, theta_dot = self.state | |
costheta = math.cos(theta) | |
sintheta = math.sin(theta) | |
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass | |
thetaacc = (self.gravity * sintheta - costheta * temp) / \ | |
(self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) | |
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass | |
x = x + self.tau * x_dot | |
x_dot = x_dot + self.tau * xacc | |
theta = theta + self.tau * theta_dot | |
theta_dot = theta_dot + self.tau * thetaacc | |
return (x, x_dot, theta, theta_dot) | |
def step(self, action): | |
assert self.action_space.contains(action), \ | |
"%r (%s) invalid" % (action, type(action)) | |
# Cast action to float to strip np trappings | |
force = self.force_mag * float(action) | |
self.state = self.stepPhysics(force) | |
x, x_dot, theta, theta_dot = self.state | |
done = x < -self.x_threshold \ | |
or x > self.x_threshold \ | |
or theta < -self.theta_threshold_radians \ | |
or theta > self.theta_threshold_radians | |
done = bool(done) | |
if not done: | |
reward = 1.0 | |
elif self.steps_beyond_done is None: | |
# Pole just fell! | |
self.steps_beyond_done = 0 | |
reward = 1.0 | |
else: | |
if self.steps_beyond_done == 0: | |
logger.warn(""" | |
You are calling 'step()' even though this environment has already returned | |
done = True. You should always call 'reset()' once you receive 'done = True' | |
Any further steps are undefined behavior. | |
""") | |
self.steps_beyond_done += 1 | |
reward = 0.0 | |
return np.array(self.state), reward, done, {} | |
def reset(self): | |
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) | |
self.steps_beyond_done = None | |
return np.array(self.state) | |
def render(self, mode='human'): | |
screen_width = 600 | |
screen_height = 400 | |
world_width = self.x_threshold * 2 | |
scale = screen_width /world_width | |
carty = 100 # TOP OF CART | |
polewidth = 10.0 | |
polelen = scale * 1.0 | |
cartwidth = 50.0 | |
cartheight = 30.0 | |
if self.viewer is None: | |
from gym.envs.classic_control import rendering | |
self.viewer = rendering.Viewer(screen_width, screen_height) | |
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 | |
axleoffset = cartheight / 4.0 | |
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) | |
self.carttrans = rendering.Transform() | |
cart.add_attr(self.carttrans) | |
self.viewer.add_geom(cart) | |
l, r, t, b = -polewidth / 2, polewidth / 2, polelen-polewidth / 2, -polewidth / 2 | |
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) | |
pole.set_color(.8, .6, .4) | |
self.poletrans = rendering.Transform(translation=(0, axleoffset)) | |
pole.add_attr(self.poletrans) | |
pole.add_attr(self.carttrans) | |
self.viewer.add_geom(pole) | |
self.axle = rendering.make_circle(polewidth / 2) | |
self.axle.add_attr(self.poletrans) | |
self.axle.add_attr(self.carttrans) | |
self.axle.set_color(.5, .5, .8) | |
self.viewer.add_geom(self.axle) | |
self.track = rendering.Line((0, carty), (screen_width, carty)) | |
self.track.set_color(0, 0, 0) | |
self.viewer.add_geom(self.track) | |
if self.state is None: | |
return None | |
x = self.state | |
cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART | |
self.carttrans.set_translation(cartx, carty) | |
self.poletrans.set_rotation(-x[2]) | |
return self.viewer.render(return_rgb_array=(mode == 'rgb_array')) | |
def close(self): | |
if self.viewer: | |
self.viewer.close() |
Thanks for posting this! What have you tried for training with the continuous input?
Thanks for sharing. :)
Thanks for sharing!
Thank you!
Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:
in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14
in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):
~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()
~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32_init_.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):
AttributeError: 'NoneType' object has no attribute 'set_current'
Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:
in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32__init__.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):AttributeError: 'NoneType' object has no attribute 'set_current'
Hello,
I was able to correct the error by copy pasting the render and close functions from the orginial (discrete) repo https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py.
Hope it helps.
Best regards.
License please @iandanforth?
Thank you for the great work!
Thanks for sharing!
Revised imports for gymnasium
:
import math
import gymnasium as gym
from gymnasium import spaces, logger
from gymnasium.utils import seeding
import numpy as np
Exactly the same as CartPole except that the action space is now continuous from -1 to 1.