forked from openai/gym
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added continuous mountain car v0 (openai#306)
* added continuous mountain car v0 * spotted that the action should be a vector, not a scalar * fixed bug on action format * bug fixed in scoreboard registering * the observation (aka state) should be a numpy array * added comment on reward range as requested
- Loading branch information
Showing
6 changed files
with
175 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from gym.envs.classic_control.cartpole import CartPoleEnv | ||
from gym.envs.classic_control.mountain_car import MountainCarEnv | ||
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv | ||
from gym.envs.classic_control.pendulum import PendulumEnv | ||
from gym.envs.classic_control.acrobot import AcrobotEnv | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@author: Olivier Sigaud | ||
A merge between two sources: | ||
* Adaptation of the MountainCar Environment from the "FAReinforcement" library | ||
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch' | ||
and then modified by Arnaud de Broissia | ||
* the OpenAI/gym MountainCar environment | ||
itself from | ||
https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp | ||
""" | ||
|
||
import math | ||
import gym | ||
from gym import spaces | ||
from gym.utils import seeding | ||
import numpy as np | ||
|
||
class Continuous_MountainCarEnv(gym.Env): | ||
metadata = { | ||
'render.modes': ['human', 'rgb_array'], | ||
'video.frames_per_second': 30 | ||
} | ||
|
||
def __init__(self): | ||
self.min_action = -1.0 | ||
self.max_action = 1.0 | ||
self.min_position = -1.2 | ||
self.max_position = 0.6 | ||
self.max_speed = 0.07 | ||
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version | ||
self.power = 0.0015 | ||
|
||
self.low_state = np.array([self.min_position, -self.max_speed]) | ||
self.high_state = np.array([self.max_position, self.max_speed]) | ||
|
||
self.viewer = None | ||
|
||
self.action_space = spaces.Box(self.min_action, self.max_action, shape = (1,)) | ||
self.observation_space = spaces.Box(self.low_state, self.high_state) | ||
|
||
self._seed() | ||
self.reset() | ||
|
||
def _seed(self, seed=None): | ||
self.np_random, seed = seeding.np_random(seed) | ||
return [seed] | ||
|
||
def _step(self, action): | ||
|
||
position = self.state[0] | ||
velocity = self.state[1] | ||
force = min(max(action[0], -1.0), 1.0) | ||
|
||
velocity += force*self.power -0.0025 * math.cos(3*position) | ||
if (velocity > self.max_speed): velocity = self.max_speed | ||
if (velocity < -self.max_speed): velocity = -self.max_speed | ||
position += velocity | ||
if (position > self.max_position): position = self.max_position | ||
if (position < self.min_position): position = self.min_position | ||
if (position==self.min_position and velocity<0): velocity = 0 | ||
|
||
done = bool(position >= self.goal_position) | ||
|
||
reward = 0 | ||
if done: | ||
reward = 100.0 | ||
reward-= math.pow(action[0],2)*0.1 | ||
|
||
self.state = np.array([position, velocity]) | ||
return self.state, reward, done, {} | ||
|
||
def _reset(self): | ||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) | ||
return np.array(self.state) | ||
|
||
# def get_state(self): | ||
# return self.state | ||
|
||
def _height(self, xs): | ||
return np.sin(3 * xs)*.45+.55 | ||
|
||
def _render(self, mode='human', close=False): | ||
if close: | ||
if self.viewer is not None: | ||
self.viewer.close() | ||
self.viewer = None | ||
return | ||
|
||
screen_width = 600 | ||
screen_height = 400 | ||
|
||
world_width = self.max_position - self.min_position | ||
scale = screen_width/world_width | ||
carwidth=40 | ||
carheight=20 | ||
|
||
|
||
if self.viewer is None: | ||
from gym.envs.classic_control import rendering | ||
self.viewer = rendering.Viewer(screen_width, screen_height) | ||
xs = np.linspace(self.min_position, self.max_position, 100) | ||
ys = self._height(xs) | ||
xys = list(zip((xs-self.min_position)*scale, ys*scale)) | ||
|
||
self.track = rendering.make_polyline(xys) | ||
self.track.set_linewidth(4) | ||
self.viewer.add_geom(self.track) | ||
|
||
clearance = 10 | ||
|
||
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0 | ||
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)]) | ||
car.add_attr(rendering.Transform(translation=(0, clearance))) | ||
self.cartrans = rendering.Transform() | ||
car.add_attr(self.cartrans) | ||
self.viewer.add_geom(car) | ||
frontwheel = rendering.make_circle(carheight/2.5) | ||
frontwheel.set_color(.5, .5, .5) | ||
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance))) | ||
frontwheel.add_attr(self.cartrans) | ||
self.viewer.add_geom(frontwheel) | ||
backwheel = rendering.make_circle(carheight/2.5) | ||
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance))) | ||
backwheel.add_attr(self.cartrans) | ||
backwheel.set_color(.5, .5, .5) | ||
self.viewer.add_geom(backwheel) | ||
flagx = (self.goal_position-self.min_position)*scale | ||
flagy1 = self._height(self.goal_position)*scale | ||
flagy2 = flagy1 + 50 | ||
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2)) | ||
self.viewer.add_geom(flagpole) | ||
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)]) | ||
flag.set_color(.8,.8,0) | ||
self.viewer.add_geom(flag) | ||
|
||
pos = self.state[0] | ||
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale) | ||
self.cartrans.set_rotation(math.cos(3 * pos)) | ||
|
||
return self.viewer.render(return_rgb_array = mode=='rgb_array') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters