This is the python source code of run_planning_RL.py for post Reinforcement Learning Example for Planning Tasks Using Q Learning and Dyna-Q
"""
Simplest model-based RL, Dyna-Q.
Rewards 3, 10 are specified in env script in the following
The first goal is the goal to achieve - for example number of units to complete
Goal_completion_criteria_and_rewards = [
[2,3],
[3,10]
]
This script is the main part which controls the update method of this example.
The RL is in RL_brain.py.
agent = "RANDOM_AGENT" or ""
actions ML, RL
goals ML project, RL project
state positions
number of hours(steps) for each goal [4,8] example
initial [0,0] for each episode
"""
from planning_env import Maze
from RL_brain import QLearningTable, EnvModel
output_data=[]
indexes=[]
def update():
counter=0
sum=0
for episode in range(2000):
env.reset()
print ("episode=" + str(episode))
s_position=[0,0]
while True:
a = RL.choose_action(str(s_position))
s_next_position, r, done, comp_results = env.step(a)
RL.learn(str(s_position), a, r, str(s_next_position), done)
env_model.store_transition(str(s_position), a, r, s_next_position)
for n in range(10): # learn 10 more times using the env_model
ms, ma = env_model.sample_s_a() # ms in here is a str
mr, ms_ = env_model.get_r_s_(ms, ma)
RL.learn(ms, ma, mr, str(ms_), done)
s_position = s_next_position.copy()
if done:
sum=sum+r
if episode % 50 == 0:
output_data.append (sum / 50)
sum=0
indexes.append ( episode)
counter=counter+1
env_model.get_env()
break
print('episodes over')
if __name__ == "__main__":
env = Maze()
#RL = QLearningTable(actions=list(range(env.n_actions)), agent = "RANDOM_AGENT")
RL = QLearningTable(actions=list(range(env.n_actions)))
env_model = EnvModel(actions=list(range(env.n_actions)))
update()
env_model.get_env()
import matplotlib.pyplot as plt
plt.plot(indexes, output_data, label='RL')
plt.show()