This is the python source code of run_planning_RL.py for post Reinforcement Learning Example for Planning Tasks Using Q Learning and Dyna-Q
""" Simplest model-based RL, Dyna-Q. Rewards 3, 10 are specified in env script in the following The first goal is the goal to achieve - for example number of units to complete Goal_completion_criteria_and_rewards = [ [2,3], [3,10] ] This script is the main part which controls the update method of this example. The RL is in RL_brain.py. agent = "RANDOM_AGENT" or "" actions ML, RL goals ML project, RL project state positions number of hours(steps) for each goal [4,8] example initial [0,0] for each episode """ from planning_env import Maze from RL_brain import QLearningTable, EnvModel output_data=[] indexes=[] def update(): counter=0 sum=0 for episode in range(2000): env.reset() print ("episode=" + str(episode)) s_position=[0,0] while True: a = RL.choose_action(str(s_position)) s_next_position, r, done, comp_results = env.step(a) RL.learn(str(s_position), a, r, str(s_next_position), done) env_model.store_transition(str(s_position), a, r, s_next_position) for n in range(10): # learn 10 more times using the env_model ms, ma = env_model.sample_s_a() # ms in here is a str mr, ms_ = env_model.get_r_s_(ms, ma) RL.learn(ms, ma, mr, str(ms_), done) s_position = s_next_position.copy() if done: sum=sum+r if episode % 50 == 0: output_data.append (sum / 50) sum=0 indexes.append ( episode) counter=counter+1 env_model.get_env() break print('episodes over') if __name__ == "__main__": env = Maze() #RL = QLearningTable(actions=list(range(env.n_actions)), agent = "RANDOM_AGENT") RL = QLearningTable(actions=list(range(env.n_actions))) env_model = EnvModel(actions=list(range(env.n_actions))) update() env_model.get_env() import matplotlib.pyplot as plt plt.plot(indexes, output_data, label='RL') plt.show()