This is the python source code of planning_envDQN.py for post Reinforcement Learning Python DQN Application for Resource Allocation
""" Reinforcement learning example. planning_envDQN.py This script is the environment part of this example. The RL is in RL_brainDQN.py. """ import numpy as np np.random.seed(1) ACTIONS = ['ML', 'RL'] Number_of_steps=5 Number_of_goals = 2 Curr_goals_position = [0,0] ACTION_Goal_Matrix = np.array([ [0, 0], [1, 1] ]) Goal_completion_criteria_and_rewards = [ [2,3], [3,10] ] class Maze(object): def __init__(self, actions = ACTIONS, init_pos=Curr_goals_position): super(Maze, self).__init__() self.action_space = actions self.n_actions = len(self.action_space) self.n_features = 2 if init_pos is None: for z in range(Number_of_goals): init_pos[z]=0 self.position=init_pos self.reward = 0 self.position_number = 0 def reset(self): for z in range(Number_of_goals): self.position[z]=0 self.position_number = 0 self.reward = 0 def is_completed(self, ind) : if self.position[ind] >= Goal_completion_criteria_and_rewards[ind][0] : return True else : return False def step(self, action_id): completions=0 self.position[action_id]=self.position[action_id]+1 self.position_number=self.position_number+1 done = False if self.position_number >= Number_of_steps: done = True completions=0 for g in range(Number_of_goals): if self.is_completed(g): self.reward=self.reward+Goal_completion_criteria_and_rewards[g][1] completions=completions+1 return self.position, self.reward, done