update vislog to be a bit more fancy, has GPT-2/3 perf for hellaswag

karpathy · karpathy · commit 0bc8753bc036 · 2024-06-19T01:51:09.000Z
diff --git a/dev/vislog.ipynb b/dev/vislog.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,63 +61,86 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sz = \"350M\"\n",
+    "import numpy as np\n",
+    "\n",
+    "sz = \"124M\"\n",
     "loss_baseline = {\n",
     "    \"124M\": 3.424958,\n",
     "    \"350M\": 3.083089,\n",
     "    \"774M\": 3.000580,\n",
     "    \"1558M\": 2.831273,\n",
     "}[sz]\n",
-    "hella_baseline = {\n",
+    "hella2_baseline = { # for GPT-2\n",
     "    \"124M\": 0.294463,\n",
     "    \"350M\": 0.375224,\n",
     "    \"774M\": 0.431986,\n",
     "    \"1558M\": 0.488946,\n",
     "}[sz]\n",
-    "\n",
+    "hella3_baseline = { # for GPT-3\n",
+    "    \"124M\": 0.337,\n",
+    "    \"350M\": 0.436,\n",
+    "    \"774M\": 0.510,\n",
+    "    \"1558M\": 0.547,\n",
+    "}[sz]\n",
     "# assumes each model run is stored in this way\n",
-    "logfile = f\"../log{sz}/main.log\"\n",
+    "logfile = f\"../log_gpt2_{sz}/main.log\"\n",
     "streams = parse_logfile(logfile)\n",
     "\n",
+    "# optional function that smooths out the loss some\n",
+    "def smooth_moving_average(signal, window_size):\n",
+    "    if signal.ndim != 1:\n",
+    "        raise ValueError(\"smooth_moving_average only accepts 1D arrays.\")\n",
+    "    if signal.size < window_size:\n",
+    "        raise ValueError(\"Input vector needs to be bigger than window size.\")\n",
+    "    if window_size < 3:\n",
+    "        return signal\n",
+    "\n",
+    "    s = np.pad(signal, (window_size//2, window_size-1-window_size//2), mode='edge')\n",
+    "    w = np.ones(window_size) / window_size\n",
+    "    smoothed_signal = np.convolve(s, w, mode='valid')\n",
+    "    return smoothed_signal\n",
+    "\n",
     "plt.figure(figsize=(16, 6))\n",
     "\n",
     "# Panel 1: losses: both train and val\n",
     "plt.subplot(121)\n",
     "xs, ys = streams[\"trl\"] # training loss\n",
+    "ys = np.array(ys)\n",
+    "# smooth out ys using a rolling window\n",
+    "# ys = smooth_moving_average(ys, 21) # optional\n",
     "plt.plot(xs, ys, label=f'llm.c ({sz}) train loss')\n",
     "print(\"Min Train Loss:\", min(ys))\n",
     "xs, ys = streams[\"tel\"] # validation loss\n",
     "plt.plot(xs, ys, label=f'llm.c ({sz}) val loss')\n",
     "# horizontal line at GPT-2 baseline\n",
+    "# we don't have GPT-3 loss on this dataset because the weights were never released\n",
     "if loss_baseline is not None:\n",
     "    plt.axhline(y=loss_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint val loss\")\n",
     "plt.xlabel(\"steps\")\n",
     "plt.ylabel(\"loss\")\n",
     "plt.yscale('log')\n",
+    "plt.ylim(top=4.0)\n",
     "plt.legend()\n",
     "plt.title(\"Loss\")\n",
     "print(\"Min Validation Loss:\", min(ys))\n",
     "\n",
     "# Panel 2: HellaSwag eval\n",
     "plt.subplot(122)\n",
-    "xs, ys = streams[\"eval\"] # HellaSwag eval\n",
-    "plt.plot(xs, ys, label=f\"llm.c ({sz})\")\n",
-    "# horizontal line at GPT-2 baseline\n",
-    "if hella_baseline:\n",
-    "    plt.axhline(y=hella_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint\")\n",
-    "plt.xlabel(\"steps\")\n",
-    "plt.ylabel(\"accuracy\")\n",
-    "plt.legend()\n",
-    "plt.title(\"HellaSwag eval\")\n",
-    "print(\"Max Hellaswag eval:\", max(ys))"
+    "if \"eval\" in streams:\n",
+    "    xs, ys = streams[\"eval\"] # HellaSwag eval\n",
+    "    ys = np.array(ys)\n",
+    "    plt.plot(xs, ys, label=f\"llm.c ({sz})\")\n",
+    "    # horizontal line at GPT-2/3 baselines\n",
+    "    if hella2_baseline:\n",
+    "        plt.axhline(y=hella2_baseline, color='r', linestyle='--', label=f\"OpenAI GPT-2 ({sz}) checkpoint\")\n",
+    "    if hella3_baseline:\n",
+    "        plt.axhline(y=hella3_baseline, color='g', linestyle='--', label=f\"OpenAI GPT-3 ({sz}) checkpoint\")\n",
+    "    plt.xlabel(\"steps\")\n",
+    "    plt.ylabel(\"accuracy\")\n",
+    "    plt.legend()\n",
+    "    plt.title(\"HellaSwag eval\")\n",
+    "    print(\"Max Hellaswag eval:\", max(ys))\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {