SolClover · October 16, 2022 07:31
diff --git a/Art057_Python_012.py b/Art057_Python_012.py
 # Reset environment to initial state
 state, info = env.reset()

 # Cycle through 50 steps redering and displaying environment state each time
 for _ in range(50):
    
    # Render and display current state of the environment
    plt.imshow(env.render()) # render current state and pass to pyplot
    plt.axis('off')
    display.display(plt.gcf()) # get current figure and display
    display.clear_output(wait=True) # clear output before showing the next frame
    
    # Use greedy policy to evaluate
    action = eval_greedy(Qtable, state)
    
    # Pass action into step function
    state, reward, done, _, info = env.step(action)
    
    # Reset environment when done=True, i.e. when the agent falls into a Hole (H) or reaches the Goal (G)
    if done:
        # Render and display final state of the environment
        plt.imshow(env.render()) # render current state and pass to pyplot
        plt.axis('off')
        display.display(plt.gcf()) # get current figure and display
        display.clear_output(wait=True) # clear output before showing the next frame
        state, info = env.reset()

 env.close()
	# Reset environment to initial state
	state, info = env.reset()

	# Cycle through 50 steps redering and displaying environment state each time
	for _ in range(50):

	# Render and display current state of the environment
	plt.imshow(env.render()) # render current state and pass to pyplot
	plt.axis('off')
	display.display(plt.gcf()) # get current figure and display
	display.clear_output(wait=True) # clear output before showing the next frame

	# Use greedy policy to evaluate
	action = eval_greedy(Qtable, state)

	# Pass action into step function
	state, reward, done, _, info = env.step(action)

	# Reset environment when done=True, i.e. when the agent falls into a Hole (H) or reaches the Goal (G)
	if done:
	# Render and display final state of the environment
	plt.imshow(env.render()) # render current state and pass to pyplot
	plt.axis('off')
	display.display(plt.gcf()) # get current figure and display
	display.clear_output(wait=True) # clear output before showing the next frame
	state, info = env.reset()

	env.close()