Linear Regression.

eu90h · eu90h · commit cc4b13a82b28 · 2023-08-17T23:55:28.000-04:00
diff --git a/Linear Regression.ipynb b/Linear Regression.ipynb
@@ -0,0 +1,177 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook illustrates some features of the JAX library in the context of a simple linear regression problem. In real life, we could fit this model much more simply by using the the least squares estimator\n",
+    "$$\n",
+    "\\hat{\\beta}=(X^T X)^{-1}X^T y,\n",
+    "$$\n",
+    "but here we will optimize the mean-square error loss function via gradient descent."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "seed is 1692330859\n"
+     ]
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import jax.numpy as jnp\n",
+    "import jax.random as random\n",
+    "from collections import namedtuple\n",
+    "import time\n",
+    "SEED = int(time.time())\n",
+    "print(f\"seed is {SEED}\")\n",
+    "key = random.key(SEED)\n",
+    "ModelParameters = namedtuple('ModelParameters', 'w b')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@jax.jit\n",
+    "def predict(params: ModelParameters, x: jnp.array) -> jnp.array:\n",
+    "    return params.w.dot(x) + params.b\n",
+    "vpredict = jax.vmap(predict, in_axes=[None, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "JAX random numbers are a bit weird -- we have to push around some state in the `key` variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xs = random.normal(key, shape=(200,1))\n",
+    "key, _ = random.split(key)\n",
+    "Wtrue = random.normal(key, shape=(1,))\n",
+    "key, _ = random.split(key)\n",
+    "btrue = random.normal(key, shape=(1,))\n",
+    "true_params = ModelParameters(Wtrue, btrue)\n",
+    "true_ys = vpredict(true_params, xs)\n",
+    "\n",
+    "key, _ = random.split(key)\n",
+    "W = random.normal(key, shape=(1,))\n",
+    "key, _ = random.split(key)\n",
+    "b = random.normal(key, shape=(1,))\n",
+    "params = ModelParameters(W, b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we define our loss function, the mean of the square of the errors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@jax.jit\n",
+    "def mse(parameters: ModelParameters, xs: jnp.array, ys: jnp.array) -> jnp.array:\n",
+    "    y_hats = vpredict(parameters, xs)\n",
+    "    return jax.numpy.mean(jnp.square(y_hats - ys))\n",
+    "grad_mse = jax.grad(mse)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below the model is fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModelParameters(w=Array([0.2682777], dtype=float32), b=Array([0.1782908], dtype=float32))\n",
+      "ModelParameters(w=Array([1.8501179], dtype=float32), b=Array([0.6013752], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0439496], dtype=float32), b=Array([0.6323448], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0680373], dtype=float32), b=Array([0.63335055], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0710773], dtype=float32), b=Array([0.6330958], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0714667], dtype=float32), b=Array([0.63301265], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0715175], dtype=float32), b=Array([0.6329955], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0715194], dtype=float32), b=Array([0.6329933], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0715194], dtype=float32), b=Array([0.6329933], dtype=float32))\n",
+      "ModelParameters(w=Array([2.0715194], dtype=float32), b=Array([0.6329933], dtype=float32))\n"
+     ]
+    }
+   ],
+   "source": [
+    "lr = 1e-2\n",
+    "for i in range(1000):\n",
+    "    batch_grads = grad_mse(params, xs, true_ys)\n",
+    "    params = ModelParameters(params.w - lr * batch_grads.w, params.b - lr * batch_grads.b)\n",
+    "    if i % 100 == 0:\n",
+    "        print(params)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, let's compare the true parameters to the learned ones."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert jnp.isclose(true_params.w, params.w)\n",
+    "assert jnp.isclose(true_params.b, params.b)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}