This is the python source code of planning_env.py for post Reinforcement Learning Example for Planning Tasks Using Q Learning and Dyna-Q
"""
Reinforcement learning example.
This script is the environment part of this example. The RL is in RL_brain.py.
"""
import numpy as np
np.random.seed(1)
ACTIONS = ['ML', 'RL']
Number_of_steps=5
Number_of_goals = 2
Curr_goals_position = [0,0]
ACTION_Goal_Matrix = np.array([
[0, 0],
[1, 1]
])
Goal_completion_criteria_and_rewards = [
[2,3],
[3,10]
]
class Maze(object):
def __init__(self, actions = ACTIONS, init_pos=Curr_goals_position):
super(Maze, self).__init__()
self.action_space = actions
self.n_actions = len(self.action_space)
if init_pos is None:
for z in range(Number_of_goals):
init_pos[z]=0
self.position=init_pos
self.reward = 0
self.position_number = 0
def reset(self):
for z in range(Number_of_goals):
self.position[z]=0
self.position_number = 0
self.reward = 0
def is_completed(self, ind) :
if self.position[ind] >= Goal_completion_criteria_and_rewards[ind][0] :
return True
else :
return False
def step(self, action_id):
completions=0
self.position[action_id]=self.position[action_id]+1
self.position_number=self.position_number+1
done = False
if self.position_number >= Number_of_steps:
done = True
completions=0
for g in range(Number_of_goals):
if self.is_completed(g):
self.reward=self.reward+Goal_completion_criteria_and_rewards[g][1]
completions=completions+1
return self.position, self.reward, done, completions