This is the python source code of run_planning_RL_DQN.py for post Reinforcement Learning Python DQN Application for Resource Allocation
"""
Simplest model-based RL, DQN
Rewards 3, 10 are specified in env script in the following
The first column is the goal to achieve - for example number of time units to complete.
Goal_completion_criteria_and_rewards = [
[2,3],
[3,10]
]
This script is the main part which controls the update method of this example.
The RL is in RL_brainDQN.py.
agent = "RANDOM_AGENT" or ""
actions 0 , 1
goals ML project, RL project
state positions
number of hours(steps) for each goal [4,8] example
initial [0,0] for each episode
"""
from planning_envDQN import Maze
from RL_brainDQN import DeepQNetwork
output_data=[]
indexes=[]
def update():
counter=0
sum=0
step=0
for episode in range(2000):
env.reset()
print ("episode=" + str(episode))
s_position=[0,0]
while True:
a = RL.choose_action((s_position))
print ("Action :")
print (a)
s_next_position, r, done = env.step(a)
RL.store_transition(s_position, a, r, s_next_position)
step=step+1
if (step > 200) and (step % 5 == 0):
RL.learn()
s_position = s_next_position.copy()
if done:
sum=sum+r
if episode % 50 == 0:
output_data.append (sum / 50)
sum=0
indexes.append ( episode)
counter=counter+1
break
print('episodes over')
if __name__ == "__main__":
env = Maze()
RL = DeepQNetwork(env.n_actions, env.n_features,
learning_rate=0.0045,
reward_decay=0.9,
e_greedy=0.9,
replace_target_iter=400,
memory_size=2000,
)
update()
import matplotlib.pyplot as plt
plt.figure(1)
plt.plot(indexes, output_data, label='RL')
plt.ylabel('Reward')
plt.xlabel('training steps')
plt.show()
RL.plot_cost()
"""