From 7f7304ad823465098077a59b1ca16b91ca1ee2db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralf=20D=2E=20M=C3=BCller?= Date: Tue, 1 Oct 2019 10:02:05 +0200 Subject: [PATCH] added some useful output when I tried to understand this example, it really helped me to see some output. It showed me what epoche, episode and reward means and that the system is actually playing. --- ...uilding an Agent to Play Atari Games.ipynb | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb b/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb index f8f5a95..e770b7f 100644 --- a/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb +++ b/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb @@ -30,7 +30,11 @@ "from tensorflow.contrib.layers import flatten, conv2d, fully_connected\n", "from collections import deque, Counter\n", "import random\n", - "from datetime import datetime" + "from datetime import datetime\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display\n", + "%matplotlib inline" ] }, { @@ -385,6 +389,9 @@ "with tf.Session() as sess:\n", " init.run()\n", " \n", + " maxReward = 0\n", + " maxEpoch = 0\n", + " \n", " # for each episode\n", " for i in range(num_episodes):\n", " done = False\n", @@ -454,7 +461,18 @@ " global_step += 1\n", " episodic_reward += reward\n", " \n", - " print('Epoch', epoch, 'Reward', episodic_reward,)\n", + " # check if this game was more sucessful as the preceding ones\n", + " # and output the end screen if so.\n", + " if ((epoch > maxEpoch) or (episodic_reward > maxReward)):\n", + " img = plt.imshow(env.render(mode='rgb_array'))\n", + " img.set_data(env.render(mode='rgb_array'))\n", + " display.display(plt.gcf())\n", + " #display.clear_output(wait=True)\n", + " if (epoch > maxEpoch):\n", + " maxEpoch = epoch\n", + " if (episodic_reward > maxReward):\n", + " maxReward = episodic_reward\n", + " print('Episode', i, 'maxEpoch', maxEpoch, 'maxReward', maxReward, 'Epoch', epoch, 'Reward', episodic_reward,)\n", " " ] }