[go: nahoru, domu]

Skip to content

Commit

Permalink
added continuous mountain car v0 (openai#306)
Browse files Browse the repository at this point in the history
* added continuous mountain car v0

* spotted that the action should be a vector, not a scalar

* fixed bug on action format

* bug fixed in scoreboard registering

* the observation (aka state) should be a numpy array

* added comment on reward range as requested
  • Loading branch information
osigaud authored and gdb committed Aug 24, 2016
1 parent 4b9984c commit c97551e
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 1 deletion.
2 changes: 2 additions & 0 deletions gym/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class Env(object):
observation_space: The Space object corresponding to valid observations
reward_range: A tuple corresponding to the min and max possible rewards
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
The methods are accessed publicly as "step", "reset", etc.. The
non-underscored versions are wrapper methods to which we may add
functionality over time.
Expand Down
7 changes: 7 additions & 0 deletions gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@
reward_threshold=-110.0,
)

register(
id='MountainCarContinuous-v0',
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
timestep_limit=999,
reward_threshold=90.0,
)

register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
Expand Down
1 change: 1 addition & 0 deletions gym/envs/classic_control/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from gym.envs.classic_control.cartpole import CartPoleEnv
from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
from gym.envs.classic_control.pendulum import PendulumEnv
from gym.envs.classic_control.acrobot import AcrobotEnv

144 changes: 144 additions & 0 deletions gym/envs/classic_control/continuous_mountain_car.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp
"""

import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np

class Continuous_MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}

def __init__(self):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.power = 0.0015

self.low_state = np.array([self.min_position, -self.max_speed])
self.high_state = np.array([self.max_position, self.max_speed])

self.viewer = None

self.action_space = spaces.Box(self.min_action, self.max_action, shape = (1,))
self.observation_space = spaces.Box(self.low_state, self.high_state)

self._seed()
self.reset()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]

def _step(self, action):

position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], -1.0), 1.0)

velocity += force*self.power -0.0025 * math.cos(3*position)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
position += velocity
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position==self.min_position and velocity<0): velocity = 0

done = bool(position >= self.goal_position)

reward = 0
if done:
reward = 100.0
reward-= math.pow(action[0],2)*0.1

self.state = np.array([position, velocity])
return self.state, reward, done, {}

def _reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)

# def get_state(self):
# return self.state

def _height(self, xs):
return np.sin(3 * xs)*.45+.55

def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return

screen_width = 600
screen_height = 400

world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20


if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))

self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)

clearance = 10

l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)

pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))

return self.viewer.render(return_rgb_array = mode=='rgb_array')
2 changes: 1 addition & 1 deletion gym/envs/classic_control/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def _step(self,u):
l = 1.
dt = self.dt

self.last_u = u # for rendering
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)

newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
Expand Down
20 changes: 20 additions & 0 deletions gym/scoreboard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,26 @@
""",
)

add_task(
id='MountainCarContinuous-v0',
group='classic_control',
summary="Drive up a big hill with continuous control.",
description="""
A car is on a one-dimensional track,
positioned between two "mountains".
The goal is to drive up the mountain on the right; however, the car's engine is not
strong enough to scale the mountain in a single pass.
Therefore, the only way to succeed is to drive back and forth to build up momentum.
Here, the reward is greater if you spend less energy to reach the goal
""",
background="""\
This problem was first described by Andrew Moore in his PhD thesis [Moore90]_.
.. [Moore90] A Moore, Efficient Memory-Based Learning for Robot Control, PhD thesis, University of Cambridge, 1990.
Here, this is the continuous version.
""",
)

add_task(
id='Pendulum-v0',
group='classic_control',
Expand Down

0 comments on commit c97551e

Please sign in to comment.