This is the python source code of run_planning_RL_DQN.py for post Reinforcement Learning Python DQN Application for Resource Allocation
""" Simplest model-based RL, DQN Rewards 3, 10 are specified in env script in the following The first column is the goal to achieve - for example number of time units to complete. Goal_completion_criteria_and_rewards = [ [2,3], [3,10] ] This script is the main part which controls the update method of this example. The RL is in RL_brainDQN.py. agent = "RANDOM_AGENT" or "" actions 0 , 1 goals ML project, RL project state positions number of hours(steps) for each goal [4,8] example initial [0,0] for each episode """ from planning_envDQN import Maze from RL_brainDQN import DeepQNetwork output_data=[] indexes=[] def update(): counter=0 sum=0 step=0 for episode in range(2000): env.reset() print ("episode=" + str(episode)) s_position=[0,0] while True: a = RL.choose_action((s_position)) print ("Action :") print (a) s_next_position, r, done = env.step(a) RL.store_transition(s_position, a, r, s_next_position) step=step+1 if (step > 200) and (step % 5 == 0): RL.learn() s_position = s_next_position.copy() if done: sum=sum+r if episode % 50 == 0: output_data.append (sum / 50) sum=0 indexes.append ( episode) counter=counter+1 break print('episodes over') if __name__ == "__main__": env = Maze() RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.0045, reward_decay=0.9, e_greedy=0.9, replace_target_iter=400, memory_size=2000, ) update() import matplotlib.pyplot as plt plt.figure(1) plt.plot(indexes, output_data, label='RL') plt.ylabel('Reward') plt.xlabel('training steps') plt.show() RL.plot_cost() """