This is the python source code of for post Reinforcement Learning Python DQN Application for Resource Allocation
""" Reinforcement learning example. This script is the environment part of this example. The RL is in """ import numpy as np np.random.seed(1) ACTIONS = ['ML', 'RL'] Number_of_steps=5 Number_of_goals = 2 Curr_goals_position = [0,0] ACTION_Goal_Matrix = np.array([ [0, 0], [1, 1] ]) Goal_completion_criteria_and_rewards = [ [2,3], [3,10] ] class Maze(object): def __init__(self, actions = ACTIONS, init_pos=Curr_goals_position): super(Maze, self).__init__() self.action_space = actions self.n_actions = len(self.action_space) self.n_features = 2 if init_pos is None: for z in range(Number_of_goals): init_pos[z]=0 self.position=init_pos self.reward = 0 self.position_number = 0 def reset(self): for z in range(Number_of_goals): self.position[z]=0 self.position_number = 0 self.reward = 0 def is_completed(self, ind) : if self.position[ind] >= Goal_completion_criteria_and_rewards[ind][0] : return True else : return False def step(self, action_id): completions=0 self.position[action_id]=self.position[action_id]+1 self.position_number=self.position_number+1 done = False if self.position_number >= Number_of_steps: done = True completions=0 for g in range(Number_of_goals): if self.is_completed(g): self.reward=self.reward+Goal_completion_criteria_and_rewards[g][1] completions=completions+1 return self.position, self.reward, done