Reinforcement Learning Dyna-Q Run Planning

This is the python source code of run_planning_RL.py for post Reinforcement Learning Example for Planning Tasks Using Q Learning and Dyna-Q

"""
Simplest model-based RL, Dyna-Q.
Rewards 3, 10 are specified in env script in the following
The first goal is the goal to achieve - for example number of units to complete
Goal_completion_criteria_and_rewards = [
                        [2,3],
                        [3,10]
                        ]              

This script is the main part which controls the update method of this example.
The RL is in RL_brain.py.
agent = "RANDOM_AGENT"   or ""
actions ML, RL
goals  ML project,   RL project  

state positions
number of hours(steps) for each goal   [4,8]  example
initial [0,0]    for each episode

"""

from planning_env import Maze
from RL_brain import QLearningTable, EnvModel


output_data=[]
indexes=[]
def update():
   
    counter=0
    sum=0
  
    for episode in range(2000):    
        
        env.reset()
        print ("episode=" + str(episode))
      
        s_position=[0,0]    
       
        while True:
            
            a = RL.choose_action(str(s_position))

            s_next_position, r, done, comp_results = env.step(a)
          
            RL.learn(str(s_position), a, r, str(s_next_position), done)
           
            env_model.store_transition(str(s_position), a, r, s_next_position)
         
            
            for n in range(10):     # learn 10 more times using the env_model
            
                ms, ma = env_model.sample_s_a()  # ms in here is a str
                mr, ms_ = env_model.get_r_s_(ms, ma)
                RL.learn(ms, ma, mr, str(ms_), done)

           
         
            s_position = s_next_position.copy()
            
            
          
            if done:
              
                sum=sum+r
                if episode % 50 == 0:
                    output_data.append (sum / 50)
                    sum=0
                    indexes.append ( episode)
                    counter=counter+1
                    env_model.get_env()
                             
                break

   
    print('episodes over')
   

if __name__ == "__main__":
    env = Maze()
    #RL = QLearningTable(actions=list(range(env.n_actions)), agent = "RANDOM_AGENT")
    RL = QLearningTable(actions=list(range(env.n_actions)))
    env_model = EnvModel(actions=list(range(env.n_actions)))

    update()
  
    env_model.get_env()
    
    import matplotlib.pyplot as plt

    plt.plot(indexes, output_data, label='RL')
    plt.show()