Source code for evolearn.environments.environment_simple


#######################################################
#   ____  _      ___   _     ____   __    ___   _     #
#  | |_  \ \  / / / \ | |   | |_   / /\  | |_) | |\ | #
#  |_|__  \_\/  \_\_/ |_|__ |_|__ /_/--\ |_| \ |_| \| #
#                                                     #
#                  Chad Carlson - 2017                #
#######################################################


from evolearn.controllers.controller_simple import SimpleAgent

import numpy as np


[docs]class SimpleEnvironment: """ Simple wrapped callable nutrient environment. Todo: * Allow for the import of txt file for defining maze/track boundaries. * Connect imported boundaries to evaluation loop break collision flag. """ def __init__(self): # -------------------- ENVIRONMENT -------------------- # Environment parameters self.world_size = 100 self.world = np.zeros((self.world_size, self.world_size)) self.walls = False self.nutrient_rolling_update = False # Reward parameters self.variable_nutrients = False self.nutrient_density = .25 self.metabolic_cost = -0.2 self.nutrient_relative_to_cost = 3 self.nutrient_value = self.nutrient_relative_to_cost * -1 * self.metabolic_cost # -------------------- AGENT -------------------- # Agent parameters self.observation_space = 9 self.action_space = 5 # Possible actions in environment self.actions = self.build_actions() # Define Agent object self.agent = SimpleAgent(self.world_size)
[docs] def build_actions(self): """ Builds an accessible dictionary of possible actions to be called with each agent action to provide adjustments for location and heading adjustments. :return: environment action dict. Indices define position and heading adjustments for a selected action. """ # Position adjustments empty_position_adjust = [[0, 0], [0, 0], [0, 0], [0, 0]] # Position adjustments are different depending on if there is a rolling nutrient update or not if self.nutrient_rolling_update: # Namely, when the environment 'rolls', the agent can adjust its row, but never its column # Is this necessary? couldn't I just have the agent continuously move across the field without # changing its heading? If not, then this isn't exactly right, since it would also require the # generation of new columns that fit the originally specified nutrient density straight = [[-1, 0], [0, 0], [1, 0], [0, 0]] diagonal_right = [[-1, 0], [1, 0], [1, 0], [-1, 0]] diagonal_left = [[-1, 0], [-1, 0], [1, 0], [1, 0]] else: straight = [[-1, 0], [0, 1], [1, 0], [0, -1]] diagonal_right = [[-1, 1], [1, 1], [1, -1], [-1, -1]] diagonal_left = [[-1, -1], [-1, 1], [1, 1], [1, -1]] # Build possible actions with position and heading changes actions = { 0: {'heading_adjust': 0, 'position_adjust': straight}, 1: {'heading_adjust': 0, 'position_adjust': diagonal_right}, 2: {'heading_adjust': 0, 'position_adjust': diagonal_left}, 3: {'heading_adjust': -1, 'position_adjust': empty_position_adjust}, 4: {'heading_adjust': 1, 'position_adjust': empty_position_adjust} } return actions
[docs] def collision_check(self): """ Collision check to potentially break current agent's evaluation. :return: collide Boolean """ collide = False if self.walls: if self.world[self.agent.location[0], self.agent.location[1]] > self.nutrient_value: collide = True return collide
[docs] def initialize_environment(self): """ Initialize environment. :return: initialized world """ # Define a world with a certain nutrient density world = np.random.rand(self.world_size, self.world_size) world[world > self.nutrient_density] = 0 # Inlcude positive rewards # Accomodate for variable nutrient values if self.variable_nutrients: world = self.nutrient_value * ( world / self.nutrient_density ) # normalize for density (and maximum values) else: world[world > 0] = self.nutrient_value # Include negative rewards world[world == 0] = self.metabolic_cost return world
[docs] def make_observation(self): ####################### """ Making an observation for a single step through environment. """ return 0
[docs] def move_agent(self, action): ####################### """ Update agent location based on selected action. """ pass
[docs] def reformat_action(self, agent_output): """ Reformat raw network output into environment-specific (or experiment specified) action/class choice. :return: reformatted action/class index """ action = agent_output.index(max(agent_output)) return action
[docs] def reset(self): """ Complete environment reset. :return: intial environment observation """ # Re-initialize world self.world = self.initialize_environment() # Initialize your agent self.agent.reset() # Return an initial observation at the agent's current location init_observation = self.make_observation() return init_observation
[docs] def return_reward(self): """ Returns reward for agent's current location. :return: reward/state at agent.location """ return self.world[self.agent.location[0], self.agent.location[1]]
[docs] def step(self, action): """ Making a single step through the environment. :return: next observation, current reward, collision Boolean. """ self.update(action) observation = self.make_observation() state = self.return_reward() collide = self.collision_check() return observation, state, collide
[docs] def update(self, action): """ Update environment.world with respect possibly consumed nutrients at agent's current location. """ self.world[self.agent.location[0], self.agent.location[1]] = self.metabolic_cost
[docs]class Recognition: """ General image recognition object. """ def __init__(self): pass