Reinforcement Learning DQN Run Planning

This is the python source code of run_planning_RL_DQN.py for post Reinforcement Learning Python DQN Application for Resource Allocation

"""
Simplest model-based RL, DQN
Rewards 3, 10 are specified in env script in the following
The first column is the goal to achieve - for example number of time units to complete.
Goal_completion_criteria_and_rewards = [
                        [2,3],
                        [3,10]
                        ]              

This script is the main part which controls the update method of this example.
The RL is in RL_brainDQN.py.
agent = "RANDOM_AGENT"   or ""
actions 0 , 1
goals  ML project,   RL project  

state positions
number of hours(steps) for each goal   [4,8]  example
initial [0,0]    for each episode
"""

from planning_envDQN import Maze
from RL_brainDQN import DeepQNetwork   


output_data=[]
indexes=[]
def update():
   
    counter=0
    sum=0
    step=0
  
    for episode in range(2000):    
        
        env.reset()
        print ("episode=" + str(episode))
      
        s_position=[0,0]    
       
        while True:
            
            a = RL.choose_action((s_position))
            print ("Action :")
            print (a)
      

                    
            s_next_position, r, done = env.step(a)
     
            RL.store_transition(s_position, a, r, s_next_position)
         
          
            step=step+1
            if (step > 200) and (step % 5 == 0):
                 RL.learn()
         
            s_position = s_next_position.copy()

            if done:
              
                sum=sum+r
                if episode % 50 == 0:
                    output_data.append (sum / 50)
                    sum=0
                    indexes.append ( episode)
                    counter=counter+1
                
                             
                break

   
    print('episodes over')
   

if __name__ == "__main__":
    env = Maze()
  
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.0045,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=400,
                      memory_size=2000,
                    
                      )

    update()
  
    import matplotlib.pyplot as plt

    plt.figure(1)
    plt.plot(indexes, output_data, label='RL')
    plt.ylabel('Reward')
    plt.xlabel('training steps')
    plt.show()
    
    RL.plot_cost()

"""