From 7f7304ad823465098077a59b1ca16b91ca1ee2db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralf=20D=2E=20M=C3=BCller?= <ralf.d.mueller@gmail.com>
Date: Tue, 1 Oct 2019 10:02:05 +0200
Subject: [PATCH] added some useful output

when I tried to understand this example, it really helped me to see some output.
It showed me what epoche, episode and reward means and that the system is actually playing.
---
 ...uilding an Agent to Play Atari Games.ipynb | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb b/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb
index f8f5a95..e770b7f 100644
--- a/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb	
+++ b/08. Atari Games with DQN/8.8 Building an Agent to Play Atari Games.ipynb	
@@ -30,7 +30,11 @@
     "from tensorflow.contrib.layers import flatten, conv2d, fully_connected\n",
     "from collections import deque, Counter\n",
     "import random\n",
-    "from datetime import datetime"
+    "from datetime import datetime\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "from IPython import display\n",
+    "%matplotlib inline"
    ]
   },
   {
@@ -385,6 +389,9 @@
     "with tf.Session() as sess:\n",
     "    init.run()\n",
     "    \n",
+    "    maxReward = 0\n",
+    "    maxEpoch = 0\n",
+    "    \n",
     "    # for each episode\n",
     "    for i in range(num_episodes):\n",
     "        done = False\n",
@@ -454,7 +461,18 @@
     "            global_step += 1\n",
     "            episodic_reward += reward\n",
     "        \n",
-    "        print('Epoch', epoch, 'Reward', episodic_reward,)\n",
+    "        # check if this game was more sucessful as the preceding ones\n",
+    "        # and output the end screen if so.\n",
+    "        if ((epoch > maxEpoch) or (episodic_reward > maxReward)):\n",
+    "                img = plt.imshow(env.render(mode='rgb_array'))\n",
+    "                img.set_data(env.render(mode='rgb_array'))\n",
+    "                display.display(plt.gcf())\n",
+    "                #display.clear_output(wait=True)\n",
+    "        if (epoch > maxEpoch):\n",
+    "                maxEpoch = epoch\n",
+    "        if (episodic_reward > maxReward):\n",
+    "                maxReward = episodic_reward\n",
+    "        print('Episode', i, 'maxEpoch', maxEpoch, 'maxReward', maxReward, 'Epoch', epoch, 'Reward', episodic_reward,)\n",
     "    "
    ]
   }