#!/usr/bin/env python # coding: utf-8 # # Learning a sensorimotor model with a context provided by environment # # In this notebook, we will see how to use the Explauto libarary to allow the learning and control of actions that depend on a context provided by the environment. We suppose that the reader is familiar with the main components of the Explauto library explained in another notebook ([full tutorial](http://nbviewer.ipython.org/github/flowersteam/explauto/blob/master/notebook/full_tutorial.ipynb)): the environment, the sensorimotor model and the interest model. # # Another [tutorial](http://nbviewer.ipython.org/github/flowersteam/explauto/blob/master/notebook/learning_with_sensorimotor_context.ipynb) describes how to define local actions that depends on the previous motor and sensory positions. # # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # We first define an environment where the sensory feedback depends on the motor command and on the environment's context. # We define a Half Lazy Arm: which is a 2D robotic arm with 3 joints (based on explauto's SimpleArmEnvironment), but with the specificity that it behave normally only one time out of 2, and is lazy (moves much less than commanded) the other times. # In[57]: from explauto.environment.simple_arm import SimpleArmEnvironment from explauto.environment import environments class HalfLazyArm(SimpleArmEnvironment): def __init__(self, **kwargs): SimpleArmEnvironment.__init__(self, **kwargs) self.current_context = [0] # 0: lazy, 1: normal def compute_motor_command(self, m): if self.current_context[0]: return m else: return [mi/4 for mi in m] def compute_sensori_effect(self, m): s = SimpleArmEnvironment.compute_sensori_effect(self, m) self.current_context[0] = 1 - self.current_context[0] return s env_cls = HalfLazyArm env_conf = environments['simple_arm'][1]['low_dimensional'] # Here, the context is hidden in self.current_context, and not available to the learning agent. We thus use the "ContextEnvironment" class to translate this HalfLazyArm environment to an environment which is providing the context in the sensory feedback. The sensory feedback is now the concatenation of the context $c$ and the feedback of the arm $s$. # We thus call this mode "mcs". # # Additional information has to be provided to build this new environment: "reset_iterations" says the number of iterations before the environment is reset, "context_n_dims" says how many dimensions has the context, and "context_sensory_bounds" specify the min and max bounds on those dimensions. # In[58]: from explauto.environment.context_environment import ContextEnvironment context_mode = dict(mode='mcs', reset_iterations=10, context_n_dims=1, context_sensory_bounds=[[0],[1]]) environment = ContextEnvironment(env_cls, env_conf, context_mode) # In[59]: # Create the axes for plotting: get_ipython().run_line_magic('pylab', 'inline') ax = axes() for m in environment.random_motors(n=10): s = environment.update(m, reset=False) environment.plot(ax) # Here we detail the learning loop, but this is encapsulated in the "experiment" methods (see after). # # Note that to draw a goal given a context, the interest model has to be 'RandomInterest' or 'DiscretizedProgress'. # # The RandomInterest model will just draw a random goal in the dimensions of $s$ (not $c$). # # The DiscretizedProgress model will draw a goal in the $s$ region where the progress is maximal on points when context was similar to $c$. # In[60]: ax = axes() # Random Goal Babbling from explauto.interest_model.random import RandomInterest im_model = RandomInterest(environment.conf, environment.conf.s_dims) # Reset environment environment.reset() # Reset sensorimotor model from explauto.sensorimotor_model.sensorimotor_model import SensorimotorModel sm_model = SensorimotorModel.from_configuration(environment.conf, 'nearest_neighbor', 'default') # Add points to boostrap sensorimotor model for i in range(10): sm_model.update([0.]*3, [0.]*3) in_dims = range(3,6) out_dims = range(3) for i in range(1000): if np.mod(i, context_mode['reset_iterations']) == 0: environment.reset() # Get the context from environment context = environment.get_current_context() # Draw a goal given this context s_g = list(im_model.sample_given_context(context, range(context_mode["context_n_dims"]))) # Infer a motor position to reach this goal given the context m = sm_model.infer(in_dims, out_dims, context + s_g) s = environment.update(m, reset=False) # update the sensorimotor model: sm_model.update(m, s) # update interest model im_model.update(np.hstack((m, context, s_g)), np.hstack((m, s))) # plot arm environment.plot(ax, alpha=0.1) #print "m", m, "s", s, "ds_g", ds_g, "dm", dm # Now, let's give a goal to the agent, together with the context of the environment and see if it manages to take into account the context (lazy or not) to do the right motor command. # # The goal is [0.95, 0.3], and here the context is normal: # In[62]: # Inverse with sensory context ax = axes() s_g = [0.95, 0.3] sm_model.mode = "exploit" context = environment.get_current_context() print "Context:", "normal" if context[0] else "lazy" #print "NN", [sm_model.model.imodel.fmodel.get_xy(idx) for idx in sm_model.model.imodel.fmodel.dataset.nn_y(context+s_g, k=3)[1]] m = sm_model.infer(in_dims, out_dims, context + s_g) print "Infered m", m s = environment.update(m, reset=False) ax.plot(s_g[0], s_g[1], marker='o', color='red') environment.plot(ax, alpha=0.1) print "Goal s=", s_g, "Reached s=", s # The same goal in the lazy context: # In[63]: # Inverse with sensory context ax = axes() s_g = [0.95, 0.3] sm_model.mode = "exploit" context = environment.get_current_context() print "Context:", "normal" if context[0] else "lazy" #print "NN", [sm_model.model.imodel.fmodel.get_xy(idx) for idx in sm_model.model.imodel.fmodel.dataset.nn_y(context+s_g, k=3)[1]] m = sm_model.infer(in_dims, out_dims, context + s_g) print "Infered m", m s = environment.update(m, reset=False) ax.plot(s_g[0], s_g[1], marker='o', color='red') environment.plot(ax, alpha=0.1) print "Goal s=", s_g, "Reached s=", s # ## Using "Experiment" # # Here, the previous loop is entirely encapsulated in the "Experiment" class. # In[65]: import numpy as np from explauto import Agent from explauto import Experiment from explauto.utils import rand_bounds from explauto.experiment import make_settings from explauto.interest_model.interest_model import InterestModel get_ipython().run_line_magic('pylab', 'inline') context_mode = dict(mode='mcs', rest_position=[0]*3, reset_iterations=10, context_n_dims=1, context_sensory_bounds=[[0],[1]]) sm_model = SensorimotorModel.from_configuration(environment.conf, 'nearest_neighbor', 'default') im_model = InterestModel.from_configuration(environment.conf, environment.conf.s_dims, 'random') agent = Agent(environment.conf, sm_model, im_model, context_mode=context_mode) expe = Experiment(environment, agent, context_mode) expe.evaluate_at([100, 500, 1000, 2000, 5000, 10000], rand_bounds(np.vstack(([0.97, -0.3], [1., 0.3])), n=200)) expe.run() ax = axes() expe.log.plot_learning_curve(ax)