Reinforcement Learning Dyna-Q Planning Environment

This is the python source code of planning_env.py for post Reinforcement Learning Example for Planning Tasks Using Q Learning and Dyna-Q

"""
Reinforcement learning  example.

This script is the environment part of this example. The RL is in RL_brain.py.

"""


import numpy as np
np.random.seed(1)


ACTIONS = ['ML', 'RL']

Number_of_steps=5
Number_of_goals = 2

Curr_goals_position = [0,0] 

ACTION_Goal_Matrix = np.array([
                        [0, 0],
                        [1, 1]
                     
                        ])


Goal_completion_criteria_and_rewards = [
                        [2,3],
                        [3,10]
                        ]                        

class Maze(object):
    def __init__(self, actions = ACTIONS, init_pos=Curr_goals_position):
        super(Maze, self).__init__()
        self.action_space = actions
        self.n_actions = len(self.action_space)
      
        
        if init_pos is None:
            for z in range(Number_of_goals):
               init_pos[z]=0 
        self.position=init_pos
        self.reward = 0
        self.position_number = 0
     
    def reset(self):
         for z in range(Number_of_goals):
               self.position[z]=0 
         self.position_number = 0
         self.reward = 0

   

    def is_completed(self, ind) :
       
        if self.position[ind] >= Goal_completion_criteria_and_rewards[ind][0] :
              return True
        else :
              return False
           
      
        
    def step(self, action_id):

         completions=0

         self.position[action_id]=self.position[action_id]+1

         self.position_number=self.position_number+1
         done = False
         if self.position_number >= Number_of_steps:
             done = True
             completions=0
            
           
             for g in range(Number_of_goals):
                 if self.is_completed(g):
                   self.reward=self.reward+Goal_completion_criteria_and_rewards[g][1]
                   completions=completions+1
           
                          
        
         return self.position, self.reward, done, completions
Share this: