Initial commit, features simple reachers

8b5402b7 · Max Spahn · 8b5402b7 · 8b5402b7 · 8b5402b7 · 8b5402b7
Commit 8b5402b7 authored 3 years ago by Max Spahn
--- a/.gitignore
+++ b/.gitignore
+planarEnvs.egg-info/
--- a/nLinkReacher/.gitignore
+++ b/nLinkReacher/.gitignore
+__pycache__/
--- a/nLinkReacher/__init__.py
+++ b/nLinkReacher/__init__.py
+from gym.envs.registration import register
+register(
+    id='nLink-reacher-acc-v0',
+    entry_point='nLinkReacher.envs:NLinkAccReacherEnv'
+)
+register(
+    id='nLink-reacher-vel-v0',
+    entry_point='nLinkReacher.envs:NLinkVelReacherEnv'
+)
+register(
+    id='nLink-reacher-tor-v0',
+    entry_point='nLinkReacher.envs:NLinkTorReacherEnv'
+)
--- a/nLinkReacher/envs/.gitignore
+++ b/nLinkReacher/envs/.gitignore
+__pycache__/
--- a/nLinkReacher/envs/__init__.py
+++ b/nLinkReacher/envs/__init__.py
+from nLinkReacher.envs.acc import NLinkAccReacherEnv
+from nLinkReacher.envs.vel import NLinkVelReacherEnv
+from nLinkReacher.envs.tor import NLinkTorReacherEnv
--- a/nLinkReacher/envs/acc.py
+++ b/nLinkReacher/envs/acc.py
+import numpy as np
+from numpy import sin, cos, pi
+
+from scipy.integrate import odeint
+
+from gym import core, spaces
+from gym.utils import seeding
+
+
+class NLinkAccReacherEnv(core.Env):
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}
+
+    LINK_LENGTH = 1.0  # [m]
+
+    MAX_VEL = 4 * pi
+    MAX_POS = pi
+    MAX_ACC = 9 * pi
+
+    def __init__(self, n=2, dt=0.01):
+        self._n = n
+        self.viewer = None
+        limUpPos = [self.MAX_POS for i in range(n)]
+        limUpVel = [self.MAX_VEL for i in range(n)]
+        limUpAcc = [self.MAX_ACC for i in range(n)]
+        high = np.array( limUpPos + limUpVel + limUpAcc, dtype=np.float32)
+        low = -high
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float64)
+        self.action_space = spaces.Box(
+            low=-np.array(limUpAcc), high=np.array(limUpAcc), dtype=np.float64
+        )
+        self.state = None
+        self.seed()
+        self._dt = dt
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def reset(self):
+        pos = [0.1 for i in range(self._n)]
+        vel = [0.1 for i in range(self._n)]
+        self.state = np.array(pos + vel)
+        return self._get_ob()
+
+    def step(self, a):
+        s = self.state
+        self.action = a
+        ns = self.integrate()
+        acc = self.continuous_dynamics(self.state[0:self._n * 2], 0.1)
+        self.state = np.concatenate((ns, acc[0:self._n]))
+        terminal = self._terminal()
+        reward = -1.0 if not terminal else 0.0
+        return (self._get_ob(), reward, terminal, {})
+
+    def _get_ob(self):
+        return self.state
+
+    def _terminal(self):
+        s = self.state
+        return False
+
+    def continuous_dynamics(self, x, t):
+        u = self.action
+        vel = np.array(x[self._n : self._n * 2])
+        acc = np.concatenate((vel, u))
+        return acc
+
+    def integrate(self):
+        x0 = self.state[0:2 * self._n]
+        t = np.arange(0, 2 * self._dt, self._dt)
+        ynext = odeint(self.continuous_dynamics, x0, t)
+        return ynext[1]
+
+    def forwardKinematics(self, lastLinkIndex):
+        fk = np.array([0.0, 0.2, 0.0])
+        for i in range(lastLinkIndex):
+            angle = 0.0
+            for j in range(i+1):
+                angle += self.state[j]
+            fk[0] += np.cos(angle) * self.LINK_LENGTH
+            fk[1] += np.sin(angle) * self.LINK_LENGTH
+            fk[2] += self.state[i]
+        fk[2] += self.state[lastLinkIndex]
+        return fk
+
+    def render(self, mode="human"):
+        from gym.envs.classic_control import rendering
+
+        s = self.state
+
+        bound = self.LINK_LENGTH * self._n + 0.2
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(500, 500)
+            self.viewer.set_bounds(-bound, bound, -bound, bound)
+
+        if s is None:
+            return None
+
+        self.viewer.draw_line((-bound, 0), (bound, 0))
+        base = self.viewer.draw_polygon([(-0.2,0), (0.0,0.2), (0.2,0), (-0.2,0)])
+        baseJoint = self.viewer.draw_circle(.10)
+        baseJoint.set_color(.8, .8, 0)
+        tf0 = rendering.Transform(rotation=0, translation=(0.0, 0.2))
+        baseJoint.add_attr(tf0)
+        l,r,t,b = 0, self.LINK_LENGTH, .01, -.01
+        for i in range(self._n):
+            fk = self.forwardKinematics(i)
+            tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
+            link.set_color(0,.8, .8)
+            link.add_attr(tf)
+            joint = self.viewer.draw_circle(.10)
+            joint.set_color(.8, .8, 0)
+            joint.add_attr(tf)
+        fk = self.forwardKinematics(self._n)
+        tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+        eejoint = self.viewer.draw_circle(.10)
+        eejoint.set_color(.8, .8, 0)
+        eejoint.add_attr(tf)
+
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/nLinkReacher/envs/tor.py
+++ b/nLinkReacher/envs/tor.py
+import numpy as np
+from numpy import sin, cos, pi
+
+from scipy.integrate import odeint
+
+from gym import core, spaces
+from gym.utils import seeding
+
+from nLinkReacher.resources.createDynamics import createDynamics
+
+
+class NLinkTorReacherEnv(core.Env):
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}
+
+    LINK_LENGTH = 1.0  # [m]
+    LINK_MASS = 1.0
+
+    MAX_VEL = 4 * pi
+    MAX_POS = pi
+    MAX_ACC = 9 * pi
+    MAX_TOR = 1000
+
+    def __init__(self, n=2, dt=0.01, k=0.0):
+        self._n = n
+        self._k = k
+        self.viewer = None
+        self.dynamics_fun = createDynamics(n)
+        limUpPos = [self.MAX_POS for i in range(n)]
+        limUpVel = [self.MAX_VEL for i in range(n)]
+        limUpAcc = [self.MAX_ACC for i in range(n)]
+        limUpTor = [self.MAX_TOR for i in range(n)]
+        high = np.array( limUpPos + limUpVel + limUpAcc, dtype=np.float32)
+        low = -high
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float64)
+        self.action_space = spaces.Box(
+            low=-np.array(limUpTor), high=np.array(limUpTor), dtype=np.float64
+        )
+        self.state = None
+        self.seed()
+        self._dt = dt
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def reset(self):
+        pos = [0 for i in range(self._n)]
+        vel = [0 for i in range(self._n)]
+        acc = [0 for i in range(self._n)]
+        self.state = np.array(pos + vel + acc)
+        return self._get_ob()
+
+    def step(self, a):
+        s = self.state
+        self.action = a
+        ns = self.integrate()
+        acc = self.continuous_dynamics(self.state[0:self._n * 2], 0.1)
+        self.state = np.concatenate((ns, acc[0:self._n]))
+        terminal = self._terminal()
+        reward = -1.0 if not terminal else 0.0
+        return (self._get_ob(), reward, terminal, {})
+
+    def _get_ob(self):
+        return self.state
+
+    def _terminal(self):
+        s = self.state
+        return False
+
+    def continuous_dynamics(self, x, t):
+        q = x[0:self._n]
+        qdot = x[self._n: self._n * 2]
+        l = [self.LINK_LENGTH for i in range(self._n)]
+        m = [self.LINK_MASS for i in range(self._n)]
+        g = 10.0
+        tau = self.action
+        acc = np.array(self.dynamics_fun(q, qdot, l, m, g, self._k, tau))[:, 0]
+        return acc
+
+    def integrate(self):
+        x0 = self.state[0:2 * self._n]
+        t = np.arange(0, 2 * self._dt, self._dt)
+        ynext = odeint(self.continuous_dynamics, x0, t)
+        return ynext[1]
+
+    def forwardKinematics(self, lastLinkIndex):
+        fk = np.array([0.0, 0.2, 0.0])
+        for i in range(lastLinkIndex):
+            angle = 0.0
+            for j in range(i+1):
+                angle += self.state[j]
+            fk[0] += np.cos(angle) * self.LINK_LENGTH
+            fk[1] += np.sin(angle) * self.LINK_LENGTH
+            fk[2] += self.state[i]
+        fk[2] += self.state[lastLinkIndex]
+        return fk
+
+    def render(self, mode="human"):
+        from gym.envs.classic_control import rendering
+
+        s = self.state
+
+        bound = self.LINK_LENGTH * self._n + 0.2
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(500, 500)
+            self.viewer.set_bounds(-bound, bound, -bound, bound)
+
+        if s is None:
+            return None
+
+        self.viewer.draw_line((-bound, 0), (bound, 0))
+        base = self.viewer.draw_polygon([(-0.2,0), (0.0,0.2), (0.2,0), (-0.2,0)])
+        baseJoint = self.viewer.draw_circle(.10)
+        baseJoint.set_color(.8, .8, 0)
+        tf0 = rendering.Transform(rotation=0, translation=(0.0, 0.2))
+        baseJoint.add_attr(tf0)
+        l,r,t,b = 0, self.LINK_LENGTH, .01, -.01
+        for i in range(self._n):
+            fk = self.forwardKinematics(i)
+            tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
+            link.set_color(0,.8, .8)
+            link.add_attr(tf)
+            joint = self.viewer.draw_circle(.10)
+            joint.set_color(.8, .8, 0)
+            joint.add_attr(tf)
+        fk = self.forwardKinematics(self._n)
+        tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+        eejoint = self.viewer.draw_circle(.10)
+        eejoint.set_color(.8, .8, 0)
+        eejoint.add_attr(tf)
+
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/nLinkReacher/envs/vel.py
+++ b/nLinkReacher/envs/vel.py
+import numpy as np
+from numpy import sin, cos, pi
+
+from scipy.integrate import odeint
+
+from gym import core, spaces
+from gym.utils import seeding
+
+
+class NLinkVelReacherEnv(core.Env):
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}
+
+    LINK_LENGTH = 1.0  # [m]
+
+    MAX_VEL = 4 * pi
+    MAX_POS = pi
+
+    def __init__(self, n=2, dt=0.01):
+        self._n = n
+        self.viewer = None
+        limUpPos = [self.MAX_POS for i in range(n)]
+        limUpVel = [self.MAX_VEL for i in range(n)]
+        high = np.array( limUpPos + limUpVel, dtype=np.float32)
+        low = -high
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float64)
+        self.action_space = spaces.Box(
+            low=-np.array(limUpVel), high=np.array(limUpVel), dtype=np.float64
+        )
+        self.state = None
+        self.seed()
+        self._dt = dt
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def reset(self):
+        pos = [0.1 for i in range(self._n)]
+        vel = [0.1 for i in range(self._n)]
+        self.state = np.array(pos + vel)
+        return self._get_ob()
+
+    def step(self, a):
+        s = self.state
+        self.action = a
+        ns = self.integrate()
+        vel = a
+        self.state = np.concatenate((ns, vel))
+        terminal = self._terminal()
+        reward = -1.0 if not terminal else 0.0
+        return (self._get_ob(), reward, terminal, {})
+
+    def _get_ob(self):
+        return self.state
+
+    def _terminal(self):
+        s = self.state
+        return False
+
+    def continuous_dynamics(self, x, t):
+        vel = self.action
+        return vel
+
+    def integrate(self):
+        x0 = self.state[0:self._n]
+        t = np.arange(0, 2 * self._dt, self._dt)
+        ynext = odeint(self.continuous_dynamics, x0, t)
+        return ynext[1]
+
+    def forwardKinematics(self, lastLinkIndex):
+        fk = np.array([0.0, 0.2, 0.0])
+        for i in range(lastLinkIndex):
+            angle = 0.0
+            for j in range(i+1):
+                angle += self.state[j]
+            fk[0] += np.cos(angle) * self.LINK_LENGTH
+            fk[1] += np.sin(angle) * self.LINK_LENGTH
+            fk[2] += self.state[i]
+        fk[2] += self.state[lastLinkIndex]
+        return fk
+
+    def render(self, mode="human"):
+        from gym.envs.classic_control import rendering
+
+        s = self.state
+
+        bound = self.LINK_LENGTH * self._n + 0.2
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(500, 500)
+            self.viewer.set_bounds(-bound, bound, -bound, bound)
+
+        if s is None:
+            return None
+
+        self.viewer.draw_line((-bound, 0), (bound, 0))
+        base = self.viewer.draw_polygon([(-0.2,0), (0.0,0.2), (0.2,0), (-0.2,0)])
+        baseJoint = self.viewer.draw_circle(.10)
+        baseJoint.set_color(.8, .8, 0)
+        tf0 = rendering.Transform(rotation=0, translation=(0.0, 0.2))
+        baseJoint.add_attr(tf0)
+        l,r,t,b = 0, self.LINK_LENGTH, .01, -.01
+        for i in range(self._n):
+            fk = self.forwardKinematics(i)
+            tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
+            link.set_color(0,.8, .8)
+            link.add_attr(tf)
+            joint = self.viewer.draw_circle(.10)
+            joint.set_color(.8, .8, 0)
+            joint.add_attr(tf)
+        fk = self.forwardKinematics(self._n)
+        tf = rendering.Transform(rotation=fk[2], translation=fk[0:2])
+        eejoint = self.viewer.draw_circle(.10)
+        eejoint.set_color(.8, .8, 0)
+        eejoint.add_attr(tf)
+
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
--- a/nLinkReacher/resources/createDynamics.py
+++ b/nLinkReacher/resources/createDynamics.py
+import casadi as ca
+from casadi import cos, sin
+import numpy as np
+from sys import argv
+
+
+# create variables
+def main():
+    #n = int(argv[1])
+    n = 20
+    print("Creating dynamics for pendulum with " + str(n) + " joints, point masses")
+    name = '../output/dynamics_' + str(n) + "_Reacher.casadi"
+    dynamics = createDynamics(n)
+    dynamics.save(name)
+
+def createDynamics(n):
+    q = ca.SX.sym('q', n)
+    qdot = ca.SX.sym('qdot', n)
+    tau = ca.SX.sym('tau', n)
+    l = ca.SX.sym('l', n)
+    m = ca.SX.sym('m', n)
+    k = ca.SX.sym('k', 1)
+    g = ca.SX.sym('g', 1)
+    x = ca.SX.sym('x', (n, 2))
+    v = ca.SX.sym('v', (n, 2))
+    v_2 = ca.SX.sym('v_2', n)
+
+    # velocities and positions
+    v[0, 0] = -1 * sin(q[0]) * l[0] * qdot[0]
+    v[0, 1] = cos(q[0]) * l[0] * qdot[0]
+    x[0, 0] = cos(q[0]) * l[0]
+    x[0, 1] = sin(q[0]) * l[0]
+
+    for i in range(1, n):
+        angle = 0.0
+        for j in range(i+1):
+            angle += q[j]
+        v[i, 0] = v[i-1, 0] - sin(angle) * l[i] * qdot[i]
+        v[i, 1] = v[i-1, 1] + cos(angle) * l[i] * qdot[i]
+        x[i, 0] = x[i-1, 0] + cos(angle) * l[i]
+        x[i, 1] = x[i-1, 1] + sin(angle) * l[i]
+
+    for i in range(n):
+        v_2[i] = ca.norm_2(v[i, :]) ** 2
+
+
+
+    # kinetic energy
+    T = sum([0.5 * m[i] * v_2[i] for i in range(n)])
+
+    # potential energy
+    V = sum([g * m[i] * x[i, 1] for i in range(n)])
+
+    # lagrangian
+    L = T - V
+
+    # derivatives
+    dL_dq = ca.gradient(L, q)
+    dL_dqdot = ca.gradient(L, qdot)
+
+    d2L_dq2 = ca.jacobian(dL_dq, q)
+    d2L_dqdqdot = ca.jacobian(dL_dq, qdot)
+    d2L_dqdot2 = ca.jacobian(dL_dqdot, qdot)
+
+
+    K = np.identity(n) * k
+    # equation of motion 
+    # M * q_ddot + F q_dot - f = tau
+    # augmented for first order system
+    tau_aug = ca.vertcat(np.zeros((n, 1)), tau)
+    f_aug = ca.vertcat(np.zeros((n, 1)), dL_dq)
+    F_aug = ca.horzcat(np.zeros((2*n, n)), ca.vertcat(-1*np.identity(n), d2L_dqdqdot))
+    K_aug = ca.horzcat(np.zeros((2*n, n)), ca.vertcat(0*np.identity(n), K))
+    M_aug = ca.vertcat(ca.horzcat(np.identity(n), np.zeros((n, n))), ca.horzcat(np.zeros((n, n)), d2L_dqdot2))
+
+    x = ca.vertcat(q, qdot)
+
+    rhs = tau_aug + f_aug - ca.mtimes(F_aug, x) - ca.mtimes(K_aug, x)
+    xdot = ca.solve(M_aug, rhs)
+
+    # save system
+    dynamics = ca.Function("dynamics", [q, qdot, l, m, g, k, tau], [xdot])
+    return dynamics
+
+if __name__ == "__main__":
+    main()
--- a/setup.py
+++ b/setup.py
+from setuptools import setup
+
+setup(
+    name="planarEnvs",
+    version='0.0.1',
+    install_requires=['gym',
+                      'numpy']
+)