Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
from numpy import sin, cos, pi
from scipy.integrate import odeint
from gym import core, spaces
from gym.utils import seeding
class MobileBaseVelEnv(core.Env):
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}
BASE_HEIGHT = 1.0 # [m]
LINK_MASS_BASE = 500.0 #: [kg] mass of link 1
MAX_VEL_BASE = 1
MAX_POS_BASE = 5.0
actionlimits = [np.array([-MAX_VEL_BASE]), np.array([MAX_VEL_BASE])]
def __init__(self, dt=0.01):
self.viewer = None
high = np.array(
[
self.MAX_POS_BASE,
self.MAX_VEL_BASE
],
dtype=np.float32,
)
low = -high
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float64)
self.action_space = spaces.Box(
low=self.actionlimits[0], high=self.actionlimits[1], dtype=np.float64
)
self.state = None
self._dt = dt
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self.state = np.zeros(shape=(2))
return self._get_ob()
def step(self, a):
s = self.state
self.action = a
ns = self.integrate()
self.state = np.append(ns, a)
terminal = self._terminal()
reward = -1.0 if not terminal else 0.0
return (self._get_ob(), reward, terminal, {})
def _get_ob(self):
return self.state
def _terminal(self):
if self.state[0] > self.MAX_POS_BASE or self.state[0] < -self.MAX_POS_BASE:
return True
return False
def continuous_dynamics(self, x, t):
return self.action
def integrate(self):
x0 = self.state[0]
t = np.arange(0, 2 * self._dt, self._dt)
ynext = odeint(self.continuous_dynamics, x0, t)
return ynext[1]
def render(self, mode="human"):
from gym.envs.classic_control import rendering
s = self.state
if self.viewer is None:
self.viewer = rendering.Viewer(500, 500)
bound = self.MAX_POS_BASE + 1.0
self.viewer.set_bounds(-bound, bound, -bound, bound)
if s is None:
return None
p0 = [s[0], 0.5 * self.BASE_HEIGHT]
p1 = [p0[0], p0[1] + 0.5 * self.BASE_HEIGHT]
p = [p0, p1]
thetas = [0.0, 0.0]
tf0 = rendering.Transform(rotation=thetas[0], translation=p0)
tf1 = rendering.Transform(rotation=thetas[1], translation=p1)
tf = [tf0, tf1]
self.viewer.draw_line((-5.5, 0), (5.5, 0))
l, r, t, b = -0.5, 0.5, 0.5, -0.5
link = self.viewer.draw_polygon([(l, b), (l, t), (r, t), (r, b)])
link.set_color(0, 0.8, 0.8)
link.add_attr(tf[0])
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None