diff --git a/blogposts/cut_material.ipynb b/blogposts/cut_material.ipynb new file mode 100644 index 00000000..963ec3eb --- /dev/null +++ b/blogposts/cut_material.ipynb @@ -0,0 +1,604 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "92f808ae-02b1-4bd6-90dd-8102fc2723f4", + "metadata": { + "tags": [] + }, + "source": [ + "# CUT MATERIAL" + ] + }, + { + "cell_type": "markdown", + "id": "dac337c7-53e9-4349-8595-cb507487fc23", + "metadata": { + "tags": [] + }, + "source": [ + "### Notebook setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a71509e5-eb93-4250-82e0-7ee7840158c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Figures setup\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.clf() # needed otherwise `sns.set_theme` doesn't work\n", + "sns.set_theme(\n", + " style=\"whitegrid\",\n", + " rc={'figure.figsize': (6.25, 2.0)},\n", + ")\n", + "# High-resolution figures please\n", + "%config InlineBackend.figure_format = 'retina'\n", + "\n", + "def savefig(fig, filename):\n", + " fig.tight_layout()\n", + " fig.savefig(filename, dpi=300, bbox_inches=\"tight\", pad_inches=0)" + ] + }, + { + "cell_type": "markdown", + "id": "7a418434-46f0-4b33-a0a4-64630669f259", + "metadata": {}, + "source": [ + "#### Pandas equivalent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "42fd08de-c1ad-4712-9114-1ebbadabb441", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "75.0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "grades = [80, 90, 70, 60]\n", + "gseries = pd.Series(grades)\n", + "gseries.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d0a39b8-0714-43c6-b222-ec7772d5511e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "b4747352-7fe2-49bf-b77c-87deb251c8d8", + "metadata": {}, + "source": [ + "$N \\sim \\mathcal{N}(\\mu,\\sigma)$ has the probability density function:\n", + "\n", + "$$\n", + " f_N(x) = \\tfrac{1}{\\sigma\\sqrt{2\\pi}} e^{ -\\frac{1}{2} \\left( \\frac{x-\\mu}{\\sigma} \\right)^2 },\n", + "$$\n", + "\n", + "where $\\mu$ is the mean and $\\sigma$ is the standard deviation.\n", + "We use the notation $\\mathcal{N}(\\mu, \\sigma)$ to describe the distribution as math,\n", + "and `norm(mu,sigma)` to describe as computer model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "450085fb-38a5-46c8-b211-a59112be5df1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "243cc02d-a990-4901-957b-92ab13ce1ba9", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def fN(x, mu=0, sigma=1):\n", + " const = 1 / (sigma*np.sqrt(2*np.pi))\n", + " exp = np.exp( -1/2 * ( (x-mu)/sigma )**2 )\n", + " return const * exp" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e2396cf3-13a3-403e-a69c-4a9c0ff1fa1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12579440923099774" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fN(3, 2, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c092803-18d8-4492-aac5-d56d0f421b8c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "da46076a-26aa-483b-871c-480ca39723de", + "metadata": {}, + "outputs": [], + "source": [ + "def mean(sample):\n", + " total = 0\n", + " for xi in sample:\n", + " total = total + xi\n", + " avg = total / len(sample)\n", + " return avg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc011d84-50da-44e2-b03b-b3f49134955d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9322b6c-fa42-40e1-b108-1fa628b4d3b6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33b2a6b1-8577-46ba-a9cb-fc8e3291a3f1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "3eec87b9-837b-4096-85b7-288780e65109", + "metadata": {}, + "source": [ + "### Problem NN (numerical math considerations)\n", + "\n", + "We'll use the Python library NumPy (module `numpy` imported as `np`) \n", + "to help us with the fancy math operations.\n", + "To compute $e^x$ we can call `np.exp(x)`,\n", + "and to compute the factorial of `n` we can call `np.math.factorial(n)`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4872022e-13da-4a3a-bf77-ed919fe4adbe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 204, + "width": 547 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "def fH(h):\n", + " lam = 20\n", + " return lam**h * np.exp(-lam) / np.math.factorial(h)\n", + "\n", + "# calculation is not stable for h > 14\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "hs = np.arange(0,40)\n", + "fHs = [fH(h) for h in hs]\n", + "plt.stem(fHs)" + ] + }, + { + "cell_type": "markdown", + "id": "3e0ad2c7-8931-467b-9614-f7e41f40960e", + "metadata": {}, + "source": [ + "We can apply the log-trick to the formula for ..." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f523a8f2-d332-4d49-969f-036c16208a62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 201, + "width": 547 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "from scipy.special import gammaln\n", + "\n", + "def fHalt(h):\n", + " lam = 20\n", + " return np.exp(h * np.log(lam) - lam - gammaln(h + 1))\n", + "\n", + "fHalts = [fHalt(h) for h in hs]\n", + "plt.stem(fHalts)" + ] + }, + { + "cell_type": "markdown", + "id": "b2e807f3-8a33-4fd8-8115-4211ffa47a2d", + "metadata": {}, + "source": [ + "The log-transform trick and `gammaln` function are really useful for dealing with large factorials and multiplications of small probabilities,\n", + "which occur a lot in statistical calculations.\n", + "The need for numerical stability is one thing you need to keep in mind when\n", + "you implement statistical algorithms in production." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20928f80-2bdf-40cb-a864-b2b719df58fd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7aa0fd9e-1df3-4170-8e27-f8c2dfb47fb5", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import norm\n", + "rvZ = norm(0,1)" + ] + }, + { + "cell_type": "markdown", + "id": "52e55d96-3159-4c73-911b-bcbdd75e77c7", + "metadata": {}, + "source": [ + "The cumulative distribution function (CDF) $F_Z$ is defined as the integral \n", + "of the probability density function $f_Z$ up to some value $z=b$.\n", + "\n", + "$$\n", + " \\textrm{Pr}(\\{Z \\leq b\\}) = F_Z(b) = \\int_{z=-\\infty}^{z=b} f_Z(z)\\; dz.\n", + "$$\n", + "\n", + "The computer model `rvZ` provides the method `.cdf` which allows us to obtain the values of $F_Z$ directly." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5609fb2f-5772-45d4-8615-ef7e3768d4b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9772498680518208" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rvZ.cdf(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ab3a570d-64ee-451a-8889-660a7f6e0da4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 608 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# FIGURES ONLY\n", + "zs = np.linspace(-4, 4, 1000)\n", + "fZs = rvZ.pdf(zs)\n", + "ax = sns.lineplot(x=zs, y=fZs)\n", + "mask = (zs < 2)\n", + "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", + "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_-infty_to_2.png\")" + ] + }, + { + "cell_type": "markdown", + "id": "e5f60d2d-429c-468c-9f5f-97cb652dbd97", + "metadata": {}, + "source": [ + "We're often interested in computing the complement,\n", + "\n", + "$$\n", + " \\textrm{Pr}(\\{Z \\geq b\\}) = 1- F_Z(b) = \\int_{z=b}^{z=\\infty} f_Z(z) \\; dz.\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "73937fba-d67e-464a-8206-b4164990c09a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.02275013194817921" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1 - rvZ.cdf(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "573ca0b9-ce5a-4aaa-bfd2-167f4bfc7244", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 608 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# FIGURES ONLY\n", + "zs = np.linspace(-4, 4, 1000)\n", + "fZs = rvZ.pdf(zs)\n", + "ax = sns.lineplot(x=zs, y=fZs)\n", + "mask = (zs > 2)\n", + "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", + "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_2_to_infty.png\")" + ] + }, + { + "cell_type": "markdown", + "id": "488f92c0-3a39-4e33-aff3-3caad90d6ba3", + "metadata": {}, + "source": [ + "In statistics,\n", + "we often have to compute the probability in one or both tails of the distribution,\n", + "which corresponds the probability of observing \"extreme values\"\n", + "\n", + "$\\textrm{Pr}(\\{Z \\geq 2\\}) = \\int_{z=2}^{z=\\infty} f_Z(z) dz$" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9c33ed03-9a98-4fec-8767-de730b91ea72", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.02275013194817598" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.integrate import quad\n", + "quad(rvZ.pdf, 2, np.inf)[0]" + ] + }, + { + "cell_type": "markdown", + "id": "54ba7416-3937-4510-b8f9-f9b84f53842f", + "metadata": {}, + "source": [ + "The cumulative distribution function (CDF) $F_Z$ is defined as the integral \n", + "of the probability density function $f_Z$ up to some value $z=b$.\n", + "\n", + "$$\n", + " F_Z(b) = \\textrm{Pr}(\\{Z \\leq b\\}) = \\int_{z=-\\infty}^{z=b} f_Z(z)\\; dz.\n", + "$$\n", + "\n", + "The computer model `rvZ` provides the method `.cdf` which allows us to obtain the values of $F_Z$ directly.\n", + "For example, $F_Z(-2) = \\textrm{Pr}(\\{Z \\leq -2\\})$ can be computed as follows." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "92cf77ae-1f0e-42ad-a1d6-b360ca431fcb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.022750131948179195" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rvZ.cdf(-2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fc48598-a48c-4836-837c-6192de63df52", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "157f99fa-f54e-437c-9d0f-6abe30e23bd4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ec0bada-61f4-4ebb-a808-0a2799daed97", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0be774a-8f3b-4e9a-9323-399e62d6c726", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/blogposts/python_for_stats.ipynb b/blogposts/python_for_stats.ipynb index 97476259..5a562e08 100644 --- a/blogposts/python_for_stats.ipynb +++ b/blogposts/python_for_stats.ipynb @@ -29,12 +29,47 @@ "cell_type": "markdown", "id": "ff950e24-931e-454c-8c45-45eb6823aa7d", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "### Notebook setup" ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "db0ba656-cf8d-47fb-a889-52f03722f26d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Figures setup\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.clf() # needed otherwise `sns.set_theme` doesn't work\n", + "sns.set_theme(\n", + " style=\"whitegrid\",\n", + " rc={'figure.figsize': (6.25, 2.0)},\n", + ")\n", + "# High-resolution figures please\n", + "%config InlineBackend.figure_format = 'retina'\n", + "\n", + "def savefig(fig, filename):\n", + " fig.tight_layout()\n", + " fig.savefig(filename, dpi=300, bbox_inches=\"tight\", pad_inches=0)" + ] + }, { "cell_type": "markdown", "id": "3f689330-7def-4935-be79-520d1c0190b6", @@ -48,12 +83,12 @@ "id": "7bf445c3-2d7c-48ac-90be-84417d51f685", "metadata": {}, "source": [ - "### Python as a calculator" + "### Using Python as a calculator" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "6ff440f2-f2c4-4821-8af0-1bec72c2b608", "metadata": {}, "outputs": [ @@ -63,7 +98,7 @@ "5.5" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -74,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "a5d156a0-ac2f-4271-95e6-1f22f499b535", "metadata": {}, "outputs": [], @@ -84,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "1d87588a-e92e-4225-9ace-dbe533c4f4f6", "metadata": {}, "outputs": [], @@ -94,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "a7eaa89b-cf12-477c-94b6-30641373ea55", "metadata": {}, "outputs": [ @@ -104,7 +139,7 @@ "5.5" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -131,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "305c829d-680e-4652-8399-2f55b40bee57", "metadata": {}, "outputs": [ @@ -141,7 +176,7 @@ "2.75" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -171,12 +206,12 @@ "id": "a63a1efe-8635-45a9-8beb-9c9d05fad193", "metadata": {}, "source": [ - "### Powerful primitives and built-ins" + "### Powerful primitives and builtin functions" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "57b9fab6-d2bd-4ebf-a191-eb83f81d6a0b", "metadata": {}, "outputs": [ @@ -186,7 +221,7 @@ "75.0" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -215,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "e097b267-e125-4f0f-bb06-5e6ef837fb48", "metadata": {}, "outputs": [ @@ -225,7 +260,7 @@ "75.0" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -290,7 +325,7 @@ "id": "f5142273-ad49-459a-8b55-32ffccee1db1", "metadata": {}, "source": [ - "#### Example\n", + "#### Example 1: sample mean\n", "\n", "We want to define a Python function `mean` that computes the mean from a given sample (a list of values).\n", "\n", @@ -302,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "ab6e7d58-79f9-4afd-9f85-b814b45eb1bb", "metadata": {}, "outputs": [], @@ -323,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "ab56d567-0758-4b70-9b97-fb2cd1283deb", "metadata": {}, "outputs": [ @@ -333,7 +368,7 @@ "75.0" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -356,7 +391,7 @@ "id": "795538a1-fa68-43c8-b8ce-461d67d57842", "metadata": {}, "source": [ - "#### Math function example (bonus topic)\n", + "#### Exmample 2: math function (bonus topic)\n", "\n", "In math, \n", "a function is a mapping from input values (usually denoted x) to output values (usually denoted y).\n", @@ -371,7 +406,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "00fd3024-4329-4899-af02-2023a5e722f1", "metadata": {}, "outputs": [], @@ -392,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "88d9656c-913f-4617-999b-d98460ec070d", "metadata": {}, "outputs": [ @@ -402,7 +437,7 @@ "11" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -413,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "2d8c7281-2d21-4f92-8951-4ef76cd99144", "metadata": {}, "outputs": [], @@ -429,6 +464,14 @@ "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c4a84ba-e05d-4714-a525-8a2640147321", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "2483aa87-3da4-4b21-9f1c-be9899ba851f", @@ -447,133 +490,142 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "a70a4bc4-50a7-4b50-8706-b2295bf57eb4", "metadata": {}, "outputs": [], "source": [ - "pricesW = [11.8, 10, 11, 8.6, 8.3, 9.4, 8, 6.8, 8.5]" + "prices = [11.8, 10, 11, 8.6, 8.3, 9.4, 8, 6.8, 8.5]" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "5891ee22-6a6c-416d-a1bb-b283bb6a6519", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAGdCAYAAACRlkBKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAATvUlEQVR4nO3dfXTV9X3A8U8IEgKSKD4QwjMI6Kp17brjE1PnfJgy2x5PrZZaUfS03eF00rN51KMeetrjQ62jrbLWdlNqtXVuO2g3TzcKalXUKQ51zKMgGiOKaC2SRNJgSH77w8kaE/CTEHPJ9fU65/7BL/fe3yffhHvf/O7vciuKoigCAOADDCn1AADA4CAaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFKG9vWGnZ2dsXHjxhg1alRUVFT050wAwIekKIpoaWmJ+vr6GDKkd8cO+hwNGzdujAkTJvT15gBACW3YsCHGjx/fq9v0ORpGjRq1Y6c1NTV9vRsAYAA1NzfHhAkTdjyP90afo+G9lyRqampEAwAMMn05tcCJkABAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUoaWeoDf94+Pvxw/fujF2LC5NQ4dVxsLTpwRx804oNRjsROPrH8zFi1fF0+/siXG7zsi5s2aEl86ctKHsq8nXtocf/urdfFE4+aoqx0ec4+aHBfMmhIVFRW9up+nNmyJ65etjccbNscBo6rinCMnxVeOnRpDhvz//ax5pSmuW/ZcPPbi5th/72HxxSMnxV8eN63LdT7KNmxujWv//bm497nXY8SwoXHGJ8bFX588M6qHVZZ6tN3y5tvb4rr/eC5+uWZTVETE7I+PjUv+/ODYd+SwUo9WVrZt74jvrXg+/vmJV6KlrT2Om3FAXHLqwTHtgL1LPdoeoSiKuHllQ/zkkZfi9ea2+NSk0fE3p8yIP5o0utSjRURERVEURV9u2NzcHLW1tdHU1BQ1NTW7Pchtj74UV/7imS7bKodUxM8uPCKOnLrfbt8//Wv1y2/FWT96NNo7uv76XDH7kLjwT6b2676efa05Pvt3D8e27Z1dtl/0Z9Pj6yfNSN/PC795O06/cWW0vtPRZftXjp0al512SEREvPTm1ph9w0Ox9X3XuWDWlLjyL/6gj99B+Whpa49TvvtgbGxq67L9hIMPjFvO++MSTbX7OjqLmH3DQ/HcppYu2w8dVxP/On+WYOxHX7vjyfi3pzd22bb/3sNi2YJjY7+9q0o01Z5j0a/Wxg33re+ybfheQ+IX82fFzLpR/bKP3Xn+Tr88sW3btmhubu5y6S9FUcQPf/1Ct+0dnUX8+MEX+20/9J+/f/DFbsEQEfGjB1+Mjs4+dehO3bKyoVswRETc8nBDtLV39HCLnv3k4Ze6BUNExE8fbYy3t22PiIhbH32pWzBERPzsscZo+l17L6YuT3c9+Wq3YIiIuO+5N+K5Tf33mDDQ7n329W7BEBHxP682x4PP/6YEE5Wnl3/bGvf898Zu2998+534pydeKcFEe5a29o5Y8vBLPWzvjFtWNgz8QD1IR8M111wTtbW1Oy4TJkzotyFa3+no8YEo4t1/HbLn2dnP5Tct2/r9yXVn+2pp2x5vNG/b7fv5XXtHbNzyu/+7ztYer9PW3hmvvNWa3le5euGNnf99fOGNntduMNjZzz0iYv0uvmd654U3346dHdv2WB/xenNbtPzfP2Deb/0esj7paLjsssuiqalpx2XDhg39NsSIYZUxft/qHr82c0z/HI6hf83Yyc+lrmZ41Fbv1a/72tkhuX1G7BVjavOHM3c2895VQ2PcPu/+/s0c0/PrqtV7VcbE0SPS+ypXM3ZxeHRm3eB9TXpXsx9ct/svv/Ku6QfuHTt7pcdjfcSYXTx+9tdLE7srHQ1VVVVRU1PT5dJfKioq4q9OmN5t+16VFfHV46f1237oP189bloMG9r912f+n06Lyn5+/feCWVNiRA8n2X352KlRNTR/8t28Y6bEqOHdz/2dd8zkGFn17vZzj5ocNT1c5/xjJseo4f0bQ4PRZ/9wXI/xdOqhdXHQgXvGg1pfHD/jwPj4+Npu2z85cZ845iDnVPWX8fuOiDM+Ob7b9rqa4XHmp7pv/6gZvldlfPnY7ueEjRxWGRfMmlKCibrbY06EjIj4xVOvxj881BAvb26Nw8bVxtdOOCiOcBLkHuu/GjfH9+9dH09v2BLj962OC2ZN6fEBoT+seaUpvn/vunii8a2oqxke5x41OeYcMbHX9/Psa83xvRXr4rGGzXHA3lXxpaMmxblHTe5ynbWbWuK7y9fFfzb8Nvbfuyq+eMTEOO/oyb1+p0a52tTUFouWr417n30jqodVxhmfGBfzTzioVwG3J2pqbY/vrlgXv1zzWlRURMw+rD4WnDQ9asRiv9re0Rk//PUL8S+rX4mWtu1x/IwD4usnzYgJjuTt8LPHGuOnjzTG6y1t8alJ+8aCE2fEoeO6R21f7c7z9x4VDQDAh2tA3j0BAHy0iQYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAAClD+3rDoigiIqK5ubnfhgEAPlzvPW+/9zzeG32OhpaWloiImDBhQl/vAgAokZaWlqitre3VbSqKvqRGRHR2dsbGjRtj1KhRUVFR0Ze7+FA1NzfHhAkTYsOGDVFTU1PqccqatR4Y1nlgWOeBYZ0HRk/rXBRFtLS0RH19fQwZ0ruzFPp8pGHIkCExfvz4vt58wNTU1PiFHCDWemBY54FhnQeGdR4Y71/n3h5heI8TIQGAFNEAAKSUbTRUVVXFwoULo6qqqtSjlD1rPTCs88CwzgPDOg+M/l7nPp8ICQB8tJTtkQYAoH+JBgAgRTQAACmiAQBIKbtomDx5clRUVHS7zJ8/v9SjlZXt27fHFVdcEVOmTInq6uqYOnVqfPOb34zOzs5Sj1Z2WlpaYsGCBTFp0qSorq6Oo48+OlatWlXqsQa9Bx98ME4//fSor6+PioqKuPvuu7t8vSiK+MY3vhH19fVRXV0dxx9/fDzzzDOlGXYQ+6B1Xrp0aZxyyimx//77R0VFRTz11FMlmXOw29U6t7e3xyWXXBKHHXZYjBw5Murr6+Pcc8+NjRs39no/ZRcNq1atitdee23HZfny5RERceaZZ5Z4svLy7W9/O2666aZYvHhxPPvss3HdddfFd77znbjxxhtLPVrZufDCC2P58uVx2223xZo1a+Lkk0+OE088MV599dVSjzaobd26NQ4//PBYvHhxj1+/7rrrYtGiRbF48eJYtWpV1NXVxUknnbTjc3fI+aB13rp1axxzzDFx7bXXDvBk5WVX69za2hqrV6+OK6+8MlavXh1Lly6NdevWxac//ene76gocxdddFExbdq0orOzs9SjlJXZs2cX8+bN67LtjDPOKM4555wSTVSeWltbi8rKyuKee+7psv3www8vLr/88hJNVX4iorjrrrt2/Lmzs7Ooq6srrr322h3b2traitra2uKmm24qwYTl4f3r/PsaGhqKiCiefPLJAZ2pHO1qnd/z+OOPFxFRNDY29uq+y+5Iw+9755134vbbb4958+btkR+qNZjNmjUr7r333li3bl1ERDz99NOxcuXKOO2000o8WXnZvn17dHR0xPDhw7tsr66ujpUrV5ZoqvLX0NAQmzZtipNPPnnHtqqqqjjuuOPikUceKeFk0D+ampqioqIi9tlnn17drs8fWDUY3H333bFly5Y477zzSj1K2bnkkkuiqakpDj744KisrIyOjo646qqr4gtf+EKpRysro0aNiqOOOiq+9a1vxSGHHBJjxoyJO+64Ix577LGYPn16qccrW5s2bYqIiDFjxnTZPmbMmGhsbCzFSNBv2tra4tJLL405c+b0+sPCyvpIw8033xynnnpq1NfXl3qUsnPnnXfG7bffHj//+c9j9erVceutt8b1118ft956a6lHKzu33XZbFEUR48aNi6qqqrjhhhtizpw5UVlZWerRyt77j1AWReGoJYNae3t7nH322dHZ2Rk/+MEPen37sj3S0NjYGCtWrIilS5eWepSydPHFF8ell14aZ599dkREHHbYYdHY2BjXXHNNzJ07t8TTlZdp06bFAw88EFu3bo3m5uYYO3ZsnHXWWTFlypRSj1a26urqIuLdIw5jx47dsf2NN97odvQBBov29vb4/Oc/Hw0NDXHffff16SPJy/ZIw5IlS+LAAw+M2bNnl3qUstTa2hpDhnT99amsrPSWyw/RyJEjY+zYsfHWW2/FsmXL4jOf+UypRypbU6ZMibq6uh3vvop49xypBx54II4++ugSTgZ9814wPP/887FixYrYb7/9+nQ/ZXmkobOzM5YsWRJz586NoUPL8lssudNPPz2uuuqqmDhxYnzsYx+LJ598MhYtWhTz5s0r9WhlZ9myZVEURcycOTPWr18fF198ccycOTPOP//8Uo82qL399tuxfv36HX9uaGiIp556KkaPHh0TJ06MBQsWxNVXXx3Tp0+P6dOnx9VXXx0jRoyIOXPmlHDqweeD1nnz5s3x8ssv7/g/A9auXRsR7x7tee+IDx9sV+tcX18fn/vc52L16tVxzz33REdHx47zdkaPHh3Dhg3L76ivb+nYky1btqyIiGLt2rWlHqVsNTc3FxdddFExceLEYvjw4cXUqVOLyy+/vNi2bVupRys7d955ZzF16tRi2LBhRV1dXTF//vxiy5YtpR5r0Lv//vuLiOh2mTt3blEU777tcuHChUVdXV1RVVVVHHvsscWaNWtKO/Qg9EHrvGTJkh6/vnDhwpLOPdjsap3feztrT5f777+/V/vx0dgAQErZntMAAPQv0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAAp/wvFOrzvx0VmyAAAAABJRU5ErkJggg==\n", + "image/png": "\n", "text/plain": [ - "
" + "
" ] }, - "metadata": {}, + "metadata": { + "image/png": { + "height": 201, + "width": 508 + } + }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", - "sns.stripplot(x=pricesW, jitter=0)" + "sns.stripplot(x=prices, jitter=0)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "02caade4-678c-4681-893b-04e22a865903", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "
" + "
" ] }, - "metadata": {}, + "metadata": { + "image/png": { + "height": 201, + "width": 551 + } + }, "output_type": "display_data" } ], "source": [ - "sns.histplot(x=pricesW)" + "sns.histplot(x=prices)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "8455cb4d-6c7b-4341-80a2-bd67126f5383", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAGdCAYAAACRlkBKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAP9UlEQVR4nO3dX4iUZfvA8Wt2fZ1Ze1fDItettVdFLEL8nWZRHZRhYVH0xySypDMPlEA0LIzCP1l4UBKdhEhRbyfmgSCiJUUEJZgREZogJpl0ULqbsps6z3sQ7k8r8tplnWd2+nxgYHaWmefydpj5es+MUymKoggAgEtoK3sAAGB0EA0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKWOGe8V6vR7Hjh2Lzs7OqFQqIzkTAHCZFEURfX190d3dHW1tQ9s7GHY0HDt2LHp6eoZ7dQCgREePHo3rrrtuSNcZdjR0dnYOHnT8+PHDvRkAoIF6e3ujp6dn8Hl8KIYdDedfkhg/frxoAIBRZjhvLfBGSAAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIwpewDIKooi+vv7G3asgYGBiIioVqtRqVQaclyaR61W8/cOfyAaGDX6+/tj3rx5ZY/BP8SOHTuio6Oj7DGgqXh5AgBIsdPAqPTr/z0WRdtlvPueOxOdX/03IiL6Zi+IaP/X5TsWTaNSPxv/3v9e2WNA0xINjEpF25jGPZG3/0s0/EMUZQ8ATc7LEwBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIwpe4ALFUUR/f39ERFRq9WiUqmUPBEANFYzPxc21U5Df39/zJs3L+bNmze4YADwT9LMz4VNFQ0AQPMSDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBlTNkDXKgoisHz/f39JU5CM7roPnHBfQVGjMcgmsCF972iyR7r0tEwMDAQAwMDgz/39vaO+DAX3v4DDzww4rdPC6mfjYixZU9Bq6mfHTzrMYhmMDAwEOPGjSt7jEHplyfWrVsXEyZMGDz19PRczrkAgCaT3ml49tln45lnnhn8ube3d8TDoVqtDp7/4IMPolarjejtM7r19/f//7/+2prqlTVaxQX3K49BlOXCx7oLnxebQfqRt1qtXvbhK5XK4PlarRYdHR2X9XiMYhfcV2DEeAyiyVSa7LHOpycAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQMqbsAS5Uq9Vix44dg+cB4J+mmZ8LmyoaKpVKdHR0lD0GAJSmmZ8LvTwBAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAEDKmLIHgOGo1M9GcTkPcO7MX5+npVXqZ8seAZqaaGBU+vf+9xp2rM6v/tuwYwE0My9PAAApdhoYNWq1WuzYsaMhxyqKIgYGBiIiolqtRqVSachxaR61Wq3sEaDpiAZGjUqlEh0dHQ073rhx4xp2LIDRwMsTAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkCIaAIAU0QAApIgGACBFNAAAKaIBAEgRDQBAimgAAFJEAwCQIhoAgBTRAACkiAYAIEU0AAApogEASBENAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQMqY4V6xKIqIiOjt7R2xYQCAy+v88/b55/GhGHY09PX1RURET0/PcG8CAChJX19fTJgwYUjXqRTDSY2IqNfrcezYsejs7IxKpTKcm7isent7o6enJ44ePRrjx48ve5yWZq0bwzo3hnVuDOvcGH+1zkVRRF9fX3R3d0db29DepTDsnYa2tra47rrrhnv1hhk/frw7ZINY68awzo1hnRvDOjfGH9d5qDsM53kjJACQIhoAgJSWjYZqtRqrV6+OarVa9igtz1o3hnVuDOvcGNa5MUZ6nYf9RkgA4J+lZXcaAICRJRoAgBTRAACkiAYAIKXlouE///lPVCqVP52WLFlS9mgt5ezZs/Hcc8/F1KlTo6OjI6ZNmxYvvvhi1Ov1skdrOX19fbFs2bK4/vrro6OjI+bMmRN79+4te6xR75NPPon58+dHd3d3VCqV2LZt20W/L4oiXnjhheju7o6Ojo6444474ptvviln2FHsUuu8devWuPvuu+Pqq6+OSqUS+/fvL2XO0e7v1vnMmTOxYsWKmDVrVlxxxRXR3d0dTzzxRBw7dmzIx2m5aNi7d2/8+OOPg6ddu3ZFRMTDDz9c8mSt5eWXX44333wzNm3aFN9++21s2LAhXnnllXj99dfLHq3lPP3007Fr1654++234+uvv465c+fGnXfeGT/88EPZo41qp06ditmzZ8emTZv+8vcbNmyIjRs3xqZNm2Lv3r3R1dUVd9111+D37pBzqXU+depU3HLLLbF+/foGT9Za/m6dT58+Hfv27Yvnn38+9u3bF1u3bo2DBw/GfffdN/QDFS1u6dKlxfTp04t6vV72KC3l3nvvLRYvXnzRZQ8++GDx+OOPlzRRazp9+nTR3t5ebN++/aLLZ8+eXaxataqkqVpPRBQffPDB4M/1er3o6uoq1q9fP3hZf39/MWHChOLNN98sYcLW8Md1vtDhw4eLiCi+/PLLhs7Uiv5unc/74osviogojhw5MqTbbrmdhgv99ttv8c4778TixYub8ku1RrNbb701Pvzwwzh48GBERHz11Vfx6aefxj333FPyZK3l7Nmzce7cuajVahdd3tHREZ9++mlJU7W+w4cPx/Hjx2Pu3LmDl1Wr1bj99tvjs88+K3EyGBknT56MSqUSV1555ZCuN+wvrBoNtm3bFidOnIgnn3yy7FFazooVK+LkyZNxww03RHt7e5w7dy7WrFkTjz32WNmjtZTOzs64+eab46WXXoobb7wxJk2aFO+99158/vnnMWPGjLLHa1nHjx+PiIhJkyZddPmkSZPiyJEjZYwEI6a/vz9WrlwZCxcuHPKXhbX0TsNbb70V8+bNi+7u7rJHaTnvv/9+vPPOO/Huu+/Gvn37YsuWLfHqq6/Gli1byh6t5bz99ttRFEVce+21Ua1W47XXXouFCxdGe3t72aO1vD/uUBZFYdeSUe3MmTOxYMGCqNfr8cYbbwz5+i2703DkyJHYvXt3bN26texRWtLy5ctj5cqVsWDBgoiImDVrVhw5ciTWrVsXixYtKnm61jJ9+vT4+OOP49SpU9Hb2xuTJ0+ORx99NKZOnVr2aC2rq6srIn7fcZg8efLg5T/99NOfdh9gtDhz5kw88sgjcfjw4fjoo4+G9ZXkLbvTsHnz5rjmmmvi3nvvLXuUlnT69Oloa7v47tPe3u4jl5fRFVdcEZMnT45ffvkldu7cGffff3/ZI7WsqVOnRldX1+CnryJ+f4/Uxx9/HHPmzClxMhie88Hw3Xffxe7du+Oqq64a1u205E5DvV6PzZs3x6JFi2LMmJb8I5Zu/vz5sWbNmpgyZUrcdNNN8eWXX8bGjRtj8eLFZY/Wcnbu3BlFUcTMmTPj0KFDsXz58pg5c2Y89dRTZY82qv36669x6NChwZ8PHz4c+/fvj4kTJ8aUKVNi2bJlsXbt2pgxY0bMmDEj1q5dG+PGjYuFCxeWOPXoc6l1/vnnn+P7778f/D8DDhw4EBG/7/ac3/Hh0v5unbu7u+Ohhx6Kffv2xfbt2+PcuXOD79uZOHFijB07Nn+g4X6ko5nt3LmziIjiwIEDZY/Ssnp7e4ulS5cWU6ZMKWq1WjFt2rRi1apVxcDAQNmjtZz333+/mDZtWjF27Niiq6urWLJkSXHixImyxxr19uzZU0TEn06LFi0qiuL3j12uXr266OrqKqrVanHbbbcVX3/9dblDj0KXWufNmzf/5e9Xr15d6tyjzd+t8/mPs/7Vac+ePUM6jq/GBgBSWvY9DQDAyBINAECKaAAAUkQDAJAiGgCAFNEAAKSIBgAgRTQAACmiAQBIEQ0AQIpoAABSRAMAkPI/ifFX7QutfDAAAAAASUVORK5CYII=\n", + "image/png": "\n", "text/plain": [ - "
" + "
" ] }, - "metadata": {}, + "metadata": { + "image/png": { + "height": 201, + "width": 508 + } + }, "output_type": "display_data" } ], "source": [ - "sns.boxplot(x=pricesW)" + "sns.boxplot(x=prices)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "7d6743af-1055-416f-9e1d-517c2ef1e988", "metadata": {}, "outputs": [ - { - "ename": "NameError", - "evalue": "name 'savefig' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m sns\u001b[38;5;241m.\u001b[39mboxplot(x\u001b[38;5;241m=\u001b[39mpricesW, ax\u001b[38;5;241m=\u001b[39max3)\n\u001b[1;32m 18\u001b[0m ax3\u001b[38;5;241m.\u001b[39mset_xticks(\u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m7\u001b[39m,\u001b[38;5;241m13\u001b[39m))\n\u001b[0;32m---> 20\u001b[0m \u001b[43msavefig\u001b[49m(fig, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfigures/epricesW_strip_hist_box_plots.png\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'savefig' is not defined" - ] - }, { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABh8AAAFxCAYAAABjkrnkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAB7CAAAewgFu0HU+AABp/ElEQVR4nO3dd3gUVdvH8d+mAaGFjiBNIESqKE0BEVCqjzSRjig+FgxFsGBBsCAWBKUqqI8gIogCooB0UYpKFVBAmlKkJgRIAtmUff/Im3GXtN1kdje7+X6ui4vZ3TMz95zdZO/MPXOOxWaz2QQAAAAAAAAAAGCSAG8HAAAAAAAAAAAA/AvFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFNRfAAAAAAAAAAAAKai+AAAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGAqig8AAAAAAAAAAMBUFB8AAAAAAAAAAICpKD4AAAAAAAAAAABTUXwAAAAAAAAAAACmovgAAAAAAAAAAABMRfEBAAAAAAAAAACYiuIDAAAAAAAAAAAwFcUHAAAAAAAAAABgKooPgIckJiZ6OwTY4f0AAMBz8vP3bn4+dgAA4F3kIfA2ig/wiNdff121atXSmDFj3LL92NhYNW/eXLVr19bu3bvdso+ciomJ0Ysvvqjly5fnelsDBgxQrVq1VKtWLZ08edKE6PKGtGNq06aN2/d17do1TZw4UR999JHb9wUAyNsyy0/M+r49deqUGjRooFtvvVUnTpzIbbimxnby5EljOwMGDDAltoycOXNGQ4YM0c6dO922D2d56pjTmJkDAgD8j7vOk7jj/Ii/novwNM59ID+i+AC3++mnnzRv3jwVLVpUTz31lFv2UaRIEY0cOVLJycl65plnFBcX55b9uGrr1q3q2LGjvvrqK6WkpHg7nHzv4MGD6ty5s2bPnq2kpCRvhwMA8CJP5CcVK1bU4MGDFRcXp2eeeUbJyclu2U9etXz5cnXs2FHr1q3zdigeRw4IAMiKO/OQvHp+BJ7DuQ/kJRQf4Fbx8fF66aWXZLPZ9Nhjj6lkyZJu21e3bt0UERGh48ePa/LkyW7bjyu2b9+u6Ohob4eB//f7779zlQYAwKP5ySOPPKIyZcpo165dmjt3rtv2kxf99NNPio+P93YYXkEOCADIjCfykLx4fgSew7kP5CUUH+BW06ZN05kzZ1SuXDkNHDjQrfsKCAjQqFGjJEnz58/X/v373bo/b/jss8908OBBHTx4UDfeeKO3wwEAwCd5Mj8JDQ3Vk08+KUmaOnWqzp4969b9OevGG280corPPvvM2+EAAJBveCIPyQ/nRwD4BooPcJuTJ08aV/g9+uijKlCggNv3eeedd6p+/fpKTk7Wm2++6fb9AQAA3+KN/KRHjx4qX7684uLi9P7777t9fwAAIG/yZB7C+REAeQHFB7jNjBkzlJiYqNDQUHXt2tVj++3bt68k6eeff9avv/7qsf0CAIC8zxv5SUhIiHr27ClJ+uabb3T8+HGP7BcAAOQtns5DOD8CwNuCvB0A/NO5c+e0bNkySVLHjh1VpEiRbNfZunWr1q5dqx07dujcuXO6fPmyChQooLCwMNWtW1f33HOPOnbsqMDAwCy306FDB40fP15XrlzRrFmz1KRJk1wdy6ZNm7Rs2TLt2rVLZ8+elcViUcmSJRUREaFWrVqpW7du6a5WGD16tJYsWeLw3PPPP6/nn39ekjRhwgR1795dktSmTRudOnVKLVq00Mcff6xVq1Zp+vTp+uuvv1SiRAk1aNBAQ4YMUUREhAYMGGAkDOvWrXMYemnx4sXG9ufOnaumTZtq7dq1mj9/vg4cOKDLly+rdOnSuvXWW9WnTx81btw4V/0ydepUTZs2zYilYsWKWrJkib766isdOXJEcXFxKl++vJo2baoBAwYoIiIiV/uTpOjoaC1cuFA//fSTjh49qtjYWBUrVkxVqlRRy5Yt1bt37wzHy7SPNc20adOM5yIjIzV06NBcxwcAyNtykp9IqWMzz5kzRytXrtSJEycUFBSkSpUqqU2bNurbt69TYzX36NFD06dPV1JSkj7++GO98soruTqWNElJSfrqq6+0YsUKHTp0SLGxsSpTpozq1q2rnj17qmXLlhmud/LkSbVt21aS1KRJkwyHXkpJSdHq1au1YsUK7d27VxcuXFBQUJBKliyp+vXrq3Xr1urcuXO63Mw+X0ljP6xEWp7iLPu86uDBg7Jarfriiy+MQo7ValXFihXVvHlzDRw4UJUrV3Z625n5559/tGDBAm3ZskXHjx9XfHy8wsLCVKNGDd11113q2bOnChcunGWsaTLLAQEA+UtO8pC///5bX375pTZv3qy///5biYmJCgsLU+3atXXvvfdm+D1sz+zzI2nMyI2uzzOioqIUFBSksmXLqkmTJurWrZsaNmyY4bpPPPGE1q9fL0kqW7asVq5cmWl/vvjii/rqq68kSTfddJOWLFmiggULOn2snPsAcofiA9xi0aJFSkxMlCTde++9WbY9f/68RowYoe3bt6d7LTExUbGxsTp58qS+//57ffLJJ5o1a5ZKly6d6fYKFSqktm3baunSpdq0aZNOnDihSpUquXwM165d06hRo7R27dp0r/3zzz/6559/tH79ek2fPl3Tp09X/fr1Xd7H9VatWqXhw4fLZrNJks6cOaMzZ84oMjLSpe3YbDaNGTNGX375pcPzp0+f1vLly7V8+XL1799fL774ogICcn8DlNVq1RNPPKENGzY4PH/ixAmdOHFCixcv1lNPPaVHH300x/tYuHCh3nzzzXQTV0ZFRSkqKko7d+7URx99pBdeeEH3339/jvcDAPBfruQnaU6cOKFBgwbpxIkTDs///vvv+v333zV37ly9/vrrateuXZbbueGGG3Tbbbdp27ZtWrZsmZ555hmnix+ZOXnypB5//HEdOnTI4flTp07p1KlTWrVqlbp3767XX38924s3rhcdHa0hQ4Zo165dDs9brVbFx8fr5MmTWrFihWbMmKFZs2aZcsLfGTExMXr00Uf122+/OTx/9OhRHT16VAsWLND48ePVpUuXHG0/JSVFM2fO1IwZM5SUlOTw2vnz53X+/Hlt3bpVs2bN0htvvKG77rorp4cCAMhnXMlDUlJSNHnyZH388cdKTk52eO38+fPauHGjNm7cqLlz52rGjBkqW7Zshtsx6/yIPTNyo8OHD2vUqFE6cOCAw/MJCQk6duyYjh07poULF6pjx44aP358uoL/a6+9pp07dyomJkbnzp3Tu+++q7Fjx6bbz4YNG4zCQ3BwsCZOnOhS4eF6nPsAXEfxAW6xdOlSSamTLDZq1CjTdvHx8erbt68x/ECxYsXUqlUrVa1aVSEhITp37py2bNmiI0eOSEr9MnvxxRf14YcfZrn/Fi1aaOnSpbLZbPrmm29cPnkvpX6ZpRUeChcurNatW6tatWqyWCw6efKkVq1apbi4OJ07d06DBw/W6tWrVaJECUlSp06dVLNmTW3evFmbN282nqtbt64kqV69eun2d+nSJb388stG4SFN9erVFR4e7lLs06dPN644rFu3rlq0aKGAgAD9/PPP2rlzpyRp3rx5io+P14QJE1zadkbGjh1r7K9p06Zq3LixrFarNm7cqIMHDyolJUXvvvuurFZrjt6LWbNm6d133zUe33jjjWrdurVKly6t8+fPa8OGDTp16pTi4uL04osvKjo62uHLvnnz5goNDdW+ffu0YsUK47nmzZtLUqZXUwAA/Iuz+Ym94cOH69KlSypUqJDuvvtu3XTTTYqOjtbq1at19uxZXbp0SU899ZRmzJihVq1aZbmtFi1aaNu2bYqPj9eaNWvUrVu3XB3PkCFDFBcXp5CQELVp00Y1a9aU1WrVpk2b9Pvvv0tKvTOyatWqeuyxx1za9siRI43CQ4kSJdSmTRtVqlRJiYmJ+uuvv7R69WolJibq2LFjevjhh7VixQqFhIRIkvr06aO77rpLK1as0L59+yRJvXv3NgoUuSlUDB06VL/99psCAgLUqlUr1a1bV3FxcVq7dq2OHz+uxMREPffcc0pOTs7RHQbjxo3TwoULjcc1a9ZUixYtFBYWplOnTmn9+vW6cOGCoqKi9MQTT+jtt9/Wf/7zH6N9TnJAAED+4Eoe8uKLL2rx4sXG4/DwcLVo0UKFCxfWsWPHtGrVKiUmJmrv3r166KGH9PXXX2d6Ut2M8yP2cpsbHT58WH379tWlS5ckpRZI7rrrLiOP2bVrl3755RdJ0sqVK3X8+HF9/vnnKlSokLGN0qVLa+zYsXrqqackSQsWLFCXLl10yy23GG2io6M1ZswY4/GwYcNUp06dXB075z6AHLABJvvzzz9t4eHhtvDwcNvgwYOzbPv+++8bbbt162aLjo7OsN3s2bONduHh4baTJ09mud0LFy4Ybbt27eryMfzzzz+2iIgIW3h4uK158+a248ePp2sTFRVl69y5s7GfqVOnpmszZcoU4/Wvv/46w321bt3a4dgeffRR2+HDh21xcXG2bdu22VatWmW07d+/v9HuxIkTDtv5+uuvHbYTERFh+/zzz9Ptb+nSpbbatWsb7TZt2uRq96Q7tvDwcFv9+vVta9ascWiTkpJi+/DDD402tWvXth06dCjdttJeb926dbrXtm7d6rCfqVOn2hITEx3aWK1W27vvvutw7D///HO6bdn30ZQpU3J03AAA3+RKfmL/fRseHm679957033vxsfH20aMGGG0ad68ue3KlStZbve3334z2j/55JM5Oo7rY+vVq5ftzJkz6dpNnjzZaNOoUSOb1Wp1eP3EiRPG6/3793d4bceOHcZrHTt2tF28eDHd9v/++29b8+bNjXZLlixJ1+a5554zXs/oe9lZ9tsJDw+3NWnSxLZjxw6HNlar1fbaa685HHNUVJTTx2yzOeYJtWvXtn3xxRe2lJQUhzZxcXG20aNHG+0aNGhgO3LkSLptOZMDAgDyD1fykJUrVzp8Hy1cuDBdm7/++st25513Znk+Ik1uz4/YbOblRlar1dauXTuHPOb06dPp9vfLL7/YmjZtarR7/vnnM4xr2LBhDjHZnysYOnSow/d+cnJyjo6dcx9A7jDhNEyXVqGWlO1Yd2mVf0l64403jDsHrvfII4+oRo0axuM//vgjy+2WKlVKZcqUkSTt379fly9fzi5sB3v37lVKSoqk1LEYM7otsWTJkg5V9LQrDHMjPDxc06ZNU/Xq1Y2rIbIbxiEzI0aMMCaXstelSxc9/fTTxuNJkyblOF5748eP19133+3wnMVi0aOPPqoBAwZISh2XesqUKS5t97333jOWH3roIUVGRiooyPGmreDgYI0cOVL9+/eXlHqLqv16AAC4kp/YK168uD766COHeZak1Kv0Jk6caFxBd/78+XRj/V8vPDzcGO7w119/TXe3o6vKlCmj2bNnq1y5culeGzZsmBHz5cuX0w3NlBX7IY0eeOABhYWFpWtTuXJljRw5UlLq9/3evXtdjD5nAgICNH36dN16660OzwcHB+ull15S69atJaUe8+zZs53ebmJiosMYyaNHj1bv3r1lsVgc2oWGhmrChAnGfq5evarp06fn9HAAAPmEK3mI/ffXqFGj9MADD6RrU6VKFYcr5O3v2rtebs+PXC83udHixYv1119/SZIqVqyojz76SOXLl0+3jyZNmujDDz80/vZfsmSJjh49mq7d2LFjVapUKUnSn3/+qU8++USStGzZMq1atUpS6ggbb731lilDTkuc+wBcRfEBptuzZ4+xnNVwQVarVU8++aSGDBmiwYMHZ/sFXKtWLWM5Li4u2zjS2ttsNoeYnGE/LvLevXvTjfmbpnHjxvrmm2+0c+dOzZw506V9ZKRnz54KDg7O9XbKli2rwYMHZ/p6//79jcmJ9u3bp9OnT+dqf/Xq1ctyzMohQ4YYx/XTTz8pISHBqe2ePHnSGPIhNDQ029sWn3rqKYWGhkqSdu7cmWFyAgDIn5zNT6734IMPZnhyX0rNF4YMGWI8TptEMjMFCxY0hhy6dOmS8cd3TvXq1UtFixbN8LWAgAA1a9bMePzPP/84vV37PGj37t2ZtuvYsaOWL1+u3377zeGCDHdq165dlkNVDB8+3FjOaN6uzOzcuVOnTp2SlDrEQdof9Zl5/vnnjcLE999/71RuCgDIv5zNQ86ePWsMWViiRIksv48aNWqkO+64Q7fffrvatm2r2NjYTNvm5vzI9XKTG3333XfGcmRkZJbzXzVo0ECdOnWSlHqSPaOLPEqWLKlXX33VeDxjxgzt2bNH48ePN54bN26cKlSokM1ROYdzH4DrKD7AdMeOHTOWb7rppkzbhYSEqEePHho+fLieffbZLLd54cIFXbx40XicNklTVuz37eof97fccovxhbFr1y717t1bS5Ys0YULFxzaBQQEKCIiIt3kRzllPz5hbrRv3z5dhdxecHCwWrZsaTzetGlTrvbXuXPnLF8vWbKkGjRoICl1no8dO3Y4td2ff/7ZWG7RokW2E3MWKVJELVq0MB5nNIk5ACB/cjY/uV779u2zfP3OO+80vnP/+OOPdJMDXq969erGcm6LD9mNF126dGljOasTEtdr0qSJsbxy5Uo9/PDDWrlyZborJQsVKqQaNWqoQIECTm87t7LLOW6++WbdcMMNkqTjx4/r77//dmq79jnHPffck+6Oh+tVqVJFtWvXlpR6dWNWRRoAAJzNQ9LmC5JSx+pPm08pM//73//06aefaty4cVn+vZyb8yPXy2lulJCQYJxgt1gsTo3y0LFjR2N527ZtGba5++67dd9990lKvSOxf//+iomJkSTdd9992eYOruDcB+A6ig8wnf1V9MWLF3dp3aioKG3fvl1LlizR+++/r2HDhumee+5R8+bNtWXLFpe2Zb9vV672k1JvS/zvf/9rPN67d69Gjx6tFi1aqGvXrpo4caJ+/fXXTO+IyKnrb1vMKWcmUbIfxirtSr+8tr+TJ08ay84OkWHf7sSJE06tAwDwfznJT4KDg7MtVISEhBjDMyYlJTl8d2UkN/nJ9dLuYsyM/YUIacNJOiMiIkJdunQxHm/evFkjRoxQs2bN1KtXL02bNk179uzJ9bBROeFqzuFsH5NzAADcydk8xL6d/fdZbpmVf+QmNzp79qxxIemNN96Y7Ql2KfWigjRZ5VgvvfSSypYtK0nG3QYVK1bU2LFjs92HKzj3Abgu80ujgRy6cuWKsZzZUAD2YmNjNWfOHC1ZsiTLX5iBgYFKTk52Og77fbtytV+aYcOGqVChQpo+fbquXbsmKfUWxf3792v//v2aPXu2wsLC1KlTJ/33v/815Ta+YsWK5XobUvYnIyTH5CMqKipX+0sbY9Hs/aVdrSApw/GmM2Lf7tKlS06tAwDwf67mJ1Lq97Iz4wPbf8dlN45ybvMTe2bdeZmR119/XaVKldLcuXONiy2Sk5O1e/du7d69W1OnTlW5cuV033336eGHH3Yq9zCDqznH9XetZoacAwDgTs7mIdHR0cayWecHrt9nbvKP3ORG9t+1zl4IYv9da79+RvsbNWqUnnvuOeO5wYMHO1XgcAXnPgDXcecDTGe1Wo3ltDHoMnPkyBHdd999mjJlikPhISgoSNWqVVP79u31zDPPaMmSJcZtdM6y37d9TM5KmzBo48aNeuWVV3TnnXeqYMGCDm1iYmI0f/58dezYURs3bnR5H9fLaqgkVzgzb4R9ISe380w4E3dO9peTKypdubITAJB/uJKfuMr++yq74RFym594SkhIiJ577jmtX79ezz//vJo0aZLu+/vs2bOaPXu2OnbsaIxP7W55NefIbpgmAED+5mwe4swQ0znh6fwjo9woJ9+19t/pWX3XpqSkaNGiRQ7PzZ49O9cXelwvr+YhQF7GnQ8wXcGCBY0x/axWa6Z/hFutVg0dOtS4Da1SpUoaNGiQGjdurJtuuindL+mrV6+6FIf9F2puxiIOCwtT79691bt3b1mtVu3evVtbt27VDz/8oD/++EOSdO3aNY0cOVIbNmww9eqEnLK/qiIz9pVx+zGh89L+7K8YsJ/zIyv27fLCewEAyBuczU/sOfsHq/13XHbfPfYTD3pyroScKleunAYNGqRBgwYZYxdv2bJFGzZsMMavjomJ0fDhw7V69WqHyard4cqVKypRokSWbXKbc2R1ZaU9+5zD2btpAAD5k7N5iH0e4czf2c4y6/xIbnIj+2PLyXdtVjnW3LlzjXkPLBaLbDabTp8+rfHjx2vChAlO7csZnPsAXMedDzCd/RAAcXFxmbZbu3atjhw5IkkqX768vvrqK/Xv31+1atXKsDpsf/uhM1Vh+32bdatdSEiImjRpouHDh2vJkiX6/PPPjT82Y2NjtX79elP2k1tp/ZqVP//801iuXLlyntxflSpVjOWDBw86tc7+/fuN5bRxJgEAcDY/sZeQkKAzZ85k2SY+Pl7Hjx+XlHpVYXbzN9lPSG32UADuFhoaqpYtW+q5557T999/rylTphg528mTJ7Vz5063x+BMznHo0CFj2dlcwD7nOHDggFPrkHMAAJzlbB5i/31y9OjRbLe7dOlSvfTSS5o1a1aW7c06P5Kb3KhChQpG3nDq1CmnTuTbfydn9l179OhRTZ48WZIUEBCg6dOnGyfzFy9erB9++CHb/TiLcx+A6yg+wHT2f3SfPXs203a7d+82ljt06JDluHbXrl3Tnj17jMfO3F5m/4Xo6kTOs2bNUr9+/dSsWbMs/5Bu1KiR7r333gz3KXnvFvwff/wxy9cTEhK0efNmSam3DbZs2dKt+zt79qwxHEOpUqXUoEEDp7bbqFEjY3nTpk3ZXmVx5coVbd261Xh8yy23OLzOkAgAkH85m59cb8uWLVm+vm7dOiMvadCgQba34+cmP/GUCRMmqFevXmrcuHGWfdW+fXvdfvvtxmNP5EE//fRTlq//9ttvOn/+vKTUSSHLlSvn1Hbtc47Vq1dnm2seO3bMODkQEBCg+vXrO7xOzgEAsOdsHnLrrbcay1u2bMl23stly5Zp0aJFevfdd3Xu3LlM25mZf+Q0NypQoIDq1asnKfWC0tWrV2e7r++//95Yvv7veyl1iKPnn3/emKezf//+atu2rZ599lmjzUsvvWTanAic+wBcR/EBpqtWrZqxnNUE0va3/WV3W9mkSZOMLxNJxsSHWTl58mSGMTnj/Pnz2r59uy5evKhvv/02y7b2d2Rc/weu/URMrkyWnVu7du1y+CK63v/+9z/jy/eOO+5werKnzKxatUqHDx/O9PVp06YZx9+uXTunJqiSUqv/DRs2lJR69cS0adOybP/+++8bw3PVrFlTtWrVcnjdW+8HAMD7nM1Prjdr1iyHuxXsXbt2zeG7qWfPntluLzf5iaecOHFCu3fv1uXLl03Lg8wal3jBggVZTt44ZcoUY7ljx45Ob7dJkyaqWLGipNSrMT///PMs27/99tvGcsuWLdMNd0DOAQCw52weUrNmTd18882SUs9LfP3115m2PXLkiH7++WdJUokSJRwKF9czM//ITW7UrVs3Y3n69OlZnmTfs2ePQ/GhU6dO6dp8/PHHxoWtFStW1FNPPSVJ6tGjh5o0aSIptR9fffXVbI7KOZz7AFxH8QGms6/sps2JkJGIiAhj+fvvv3e4EyJNfHy8Xn/9dc2ZM8fheWfmf0jbd1BQkFFdd1aPHj2M5S+++EJfffVVhkM9rVq1SmvWrJGUOobjnXfe6fC6/e2MaXNbeMpTTz2V4V0bX375pfGHeXBwsMMVATmVmJioxx9/PN0tiMnJyZo2bZq+/PJLSanjGEZGRrq07eHDhxtV+//973+aNm1auuJTUlKSpkyZos8++0xSapX/hRdeSLctb74fAADvcjY/ud6xY8c0fPjwdH8cx8TEaMiQIfrrr78kSTfffLPat2+f5bauXr1qzJNQpkyZPHuLvH0e9P7772vjxo0Ztvv000+Nq/vKli2b7qo7++9d+5MeuRETE6NHH3003VWjCQkJGjNmjDZt2iQpdZiDAQMGOL3doKAgDRkyxHj85ptvasGCBenyv6tXr+qll14yhtosWLCgnn766XTbI+cAANhzJQ+x/z4aP358hncInD59WsOHDzdOLD/44INZzmeVm/Mj18tNbtS1a1dVrVpVUur34yOPPJLhME7bt2/X448/bvzt36VLl3R3GR46dEhTp041Ho8bN86YWNtisejVV181+uS7775z6k6L7HDuA3AdE07DdE2bNjWW7YdKul7nzp01ZcoURUVFKSEhQX379lWbNm1Uo0YNWSwWHT9+XBs2bDDGJgwODlZiYqKk7CcnOn36tHHLfd26dR3GV3RGRESEevXqpYULF8pms+nFF1/U3Llzddttt6l8+fKKj4/Xrl279MsvvxjrREZGqlSpUg7bsb+d8ZNPPpHValXRokXVuHFjh9vqzBYcHKyLFy+qX79+atWqlerVqyer1apNmzYZJwkkaeTIkapZs6Yp+ztx4oS6dOmiu+++W7Vq1dKVK1e0YcMGY9zJoKAgjRs3zuXJrW+//XZFRkYaScXUqVO1ZMkStW7dWqVLl9aFCxe0YcMGh5MaQ4YM0R133JFuW/bvx7fffquiRYuqXLlyqlmzptq0aZOTQwcA+Ahn8xN7hQoVUtGiRfXjjz+qbdu2at++vW644QadPn1aq1atMvKRsLAwvfvuu9kOufT7778bJwnSrsbLi9q2bauWLVvqp59+ktVq1aOPPqqGDRuqbt26KlOmjC5duqRt27YZ/WixWDR69Oh0Jz3sv3cnTpyoU6dOKTg4WO3atctx/hEcHKx9+/apQ4cOat++vapWraqoqCitWbNGp0+flpQ6vvQbb7yhggULurTt+++/Xzt27NDixYuVlJSksWPHat68eWrZsqWKFy+uf/75R+vXrzdyTIvFojFjxig8PDzdtryVAwIA8iZX8pB27dqpT58++uKLL3Tt2jUNHTpUDRs2VNOmTVWoUCEdOXJEa9asMS7KbNiwoQYPHpzp9nJ7fsRebnOjkJAQvf/+++rXr59iY2O1a9cudejQQa1bt1aNGjWUlJSkXbt26eeffzYuAKhZs6bGjh3rsJ2kpCQ999xzxoga9957b7qLQatVq6bHH3/cuPhy7NixatSokUqWLJnj4+fcB+A6ig8wXbVq1VS9enUdOXJEu3btUnx8vFF9tle0aFFNmzZNTzzxhGJiYpScnKw1a9YYdxLYu+eee9S1a1c9+eSTklL/eM9K2lVvaevmxJgxY2S1WrVkyRJJqZP+ZDTxT3BwsIYMGaL//ve/6V674447VLlyZR0/flwJCQn6+OOPJUkDBgxw6x+eAwYM0IEDB7RlyxZt2LBBGzZscHg9JCREr732mrp27WrK/oYPH67ly5dr//79WrlypVauXOnwetGiRTV58uQczy0RGRmpEiVKaOLEiYqPj9fJkyeNSr+90NBQjRs3Tl26dMlwO+Hh4WrYsKF27dolm81mDKnQpk0bvoABwM85m5/YK1CggD744AM99thjOn/+vBYuXJiuTdWqVTVjxgxVr1492xjMyE885b333tPw4cONmHft2qVdu3ala1e4cGE9//zz6ty5c7rXOnTooKlTp+rSpUuKiYnRzJkzjXVyWnwYM2aMPvzwQ506dcrI0eyVK1dOM2fOVJ06dXK0/TfeeEPly5fX7NmzlZiYqEOHDjlMYJ2mVKlSevvtt9WiRYsMt+OtHBAAkDe5moeMHTtWYWFhmj17tnFCPqPv4bvuuksTJ07M8q4HM/MPM3KjiIgILVy4UMOHD9fhw4d19epVrVixIsO29913n8aNG5euYPLhhx8a54XCwsL04osvZrj+f//7X61YsUKHDx9WdHS0xo0b5zBEo6s49wG4juID3KJLly6aNGmSEhMTtWnTJrVr1y7Ddrfeequ+/fZbzZ07Vz/++KNOnDghq9WqwoULq0KFCqpbt666dOmixo0bKyEhQcWKFdPly5e1bds2/fPPP6pQoUKG200bHiAgIED/+c9/cnQMwcHBevPNN3X//ffrm2++0e7du3Xq1CkjjgoVKqhly5bq3r27KleunOE2ChYsqHnz5mnSpEnavHmzYmJiVKBAASUkJOQoJmeFhobqk08+0eLFi/XVV1/p0KFDSkhIUOXKlXXnnXdqwIABmfZdTpQqVUpffvml5s+fr2XLlunYsWOy2WyqVq2a2rZtq759++bq6gJJ6tevn9q3b68FCxZo06ZN+vvvv3X58mWFhoaqevXquuuuu/TAAw9kuR+LxaLZs2frvffeM65ctL+jBgDg35zNT+zVqVNHy5Yt00cffaR169bp9OnTKlSokGrUqKFOnTqpZ8+eWf7Bby8tPylatGie/8OvSJEi+vjjj7Vx40Z999132rt3r86ePSur1aoSJUqoUqVKatWqlbp3766yZctmuI0yZcroiy++0Hvvvaft27fr8uXLKly4cLaTKGalatWqWrZsmT799FN9//33OnHihAICAhQeHq4OHTrogQceyNUVnRaLRcOHD1f37t21cOFCbdmyRadOndKVK1dUtGhRRUREqG3bturevbvDkAbX81YOCADIu1zJQywWi0aMGKEuXbpowYIF2rJli06fPq1r166pZMmSuuWWW9SjRw+1atUq2/2acX7Enhm5UY0aNbRs2TKtWLFCa9as0d69e405nSpUqKDGjRure/fuxjwI9g4cOGBc0CBJzz//fKbnAUJCQvTqq6+qX79+stlsWrVqlb777jvde++9OTp2zn0ArrPYMhrIHsilCxcuqHXr1rJarWrfvn2uKsuuunz5spo3by6r1aq7775b06dP99i+vWnx4sV6/vnnJaVWy4cOHerW/U2dOtWYBGnChAnq3r27W/cHAEBueTM/OXLkiDFR4oMPPpjh+LzI2OjRo427HObOneswdAUAAL7CG3lIfj0/YibOfQC5w4TTcIvSpUvrvvvukyRt2LBB0dHRHtv3t99+a4z79/DDD3tsvwAAIG/zZn7y9ddfS0odB/jBBx/02H4BAEDe4I08hPMjALyN4gPc5vHHH1dwcLCsVmuG4wC6g81m07x58yRJzZs312233eaR/QIAAN/gjfzk6tWr+uqrryRJ3bt3V8WKFT2yXwAAkLd4Mg/h/AiAvIDiA9ymUqVK6t27tyTp888/17Vr19y+z/Xr1+vo0aOyWCwaOXKk2/cHAAB8izfyk0WLFunSpUsqVKiQIiMj3b4/AACQN3kyD+H8CIC8gOID3Gr48OEqU6aMzp8/r88//9yt+7LZbMaYib169VLdunXduj8AAOCbPJmfXLt2TbNmzZIkDRkyROXKlXPr/gAAQN7miTyE8yMA8gqKD3CrokWL6uWXX5YkzZo1SxcvXnTbvr755hsdOHBAN9xwg55++mm37QcAAPg2T+YnH3/8sc6fP686deow1jIAAPBIHsL5EQB5BcUHuF27du3Uu3dvxcTEaPLkyW7ZR2xsrCZOnKjAwEBNnDhRRYsWdct+AACAf/BEfvLPP/9o9uzZCg0N1aRJkxQUFOSW/QAAAN/izjyE8yMA8hKLzWazeTsIAAAAAAAAAADgP7jzAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFNRfAAAAAAAAAAAAKYK8tSOrFarYmJijMcFChRQYGCgp3YPAMhHkpOTlZCQYDwOCwtTSEiIFyNCXkAuAgDwBPIQZIZcBADgCXkpF/FY8SEmJkYnTpzw1O4AAHBQtmxZb4eQ712+fFmdO3fWuXPn1K1bN7355pse3T+5CADAW8hDIJGLAAC8x1u5CMMuAQAAj3jjjTd07tw5b4cBAAAAAAA8gOIDAABwux9++EFLlizxdhgAAAAAAMBDPDbsUoECBRweV6pUSaGhoZ7afZ51+PBhJScnKzAwUDVq1PB2OF5HfziiPxzRH47oD0f2/VGhQgWHW9qv/w6CZ12+fFljxozxdhjkIhng94gj+sMR/ZEefeKI/nCU1h8BAQFKSUkxnicPQRpykfT4PeKI/nBEfziiPxzRH+nlxVzEY8WH6ydRCg0NVZEiRTy1+zwrICDA+FDQH/TH9egPR/SHI/rDkX1/XP9HHBP5eVfacEvFihXT5cuXvRYHuUh6/B5xRH84oj/So08c0R+O0vrDYrE4PE8egjTkIunxe8QR/eGI/nBEfziiP9LLi7kIwy4BAAC3sR9u6dlnn/VyNAAAAAAAwFMoPgAAALewH26pV69euv32270cEQAAAAAA8BSKDwAAwC3Shlu64YYbuOsBAAAAAIB8huIDAAAwnf1wS6+88gpjcAIAAAAAkM9QfAAAAKayH26pa9euatWqlZcjAgAAAAAAnkbxAQAAmCptuKUyZcrohRde8HY4AAAAAADAC4K8HQAAAPAf9sMtjRs3TsWLF/dyRJk7fPiwAgLy93UYiYmJxv979uzxcjTmqlGjhoKCXEt1IyIijGWr1Wp2SHlCUlKSDh8+7FRbf/585BR94oj+cJTWH8nJyV6OBAAAIG+g+AAAAExhP9xSp06ddPfdd3s5oqwlJydzgshO2kkzfxEUFKSQkBBvh5En5eS99rfPhxnoE0f0BwAAAK5H8QEAAJgibbilEiVKGEWIvCwwMJA7H+xOFgYHB3sxEvdJSbHpclyCU21tdssW94TjNcUKF1BAQOpROfte54fPh6voE0f0hyMKMAAAAI4oPgAAgFyzH25pzJgxKlmypJcjyl6NGjVUpEgRb4fhVXv27FFiYqKCg4NVv359b4fjFlfirXr7sx1OtY2Ni5UtxSZLgEVFCvvXZ+O5gY1UvEgBhYSEOP1e54fPh6voE0f0h6O0/ggMDOTOOgAAAFF8AAAAJvj++++N5ZEjR2rkyJFZtl+yZIlRrIiMjNTQoUPdGh8AAAAAAPCs/D3WAAAAAAAAAAAAMB13PgAAgFwbMGBAthNMR0VF6eWXX5YkNW3aVAMHDpQkVatWze3xAQAAAAAAz6L4AAAAcq1OnTqqU6dOlm1OnjxpLFeoUCHbYgUAAAAAAPBdDLsEAAAAAAAAAABMRfEBAAAAAAAAAACYimGXAAAAAAAAAD+RkpKiK1eueDuMXIuLi1NiYqKCg4N16dIlb4fjdfb9kZKSooAArilH3kfxAQAAAAAAAPATV65cUf/+/b0dBtxo3rx5Kl68uLfDALJF8QEAAHjEjTfeqIMHD3o7DAAAAAAA4AHcnwMAAAAAAAAAAExF8QEAAAAAAAAAAJiKYZcAAAAAAAAAPxZ6U0dZAgvkeH1bUoLij6103Ga1jrIE5XybcI4tOUHxR1dm3xDIgyg+AAAAAAAAAH7MElhAAUEFc7x+SkbbDMrdNuGcjPoe8BUMuwQAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGAqig8AAAAAAAAAAMBUFB8AAAAAAAAAAICpKD4AAAAAAAAAAABTUXwAAAAAAAAAAACmovgAAAAAAAAAAABMRfEBAAAAAAAAAACYiuIDAAAAAAAAAAAwFcUHAAAAAAAAAABgKooPAAAAAAAAAADAVBQfAAAAAAAAAACAqSg+AAAAAAAAAAAAU1F8AAAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFNRfAAAAAAAAAAAAKai+AAAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGCqIG8HAAAA/M+FCxf02WefaePGjfr7778lSeXLl1eLFi3Uq1cv1ahRw8sRAgAAAAAAd6L4AAAATLV582aNHDlSMTExDs8fPXpUR48e1RdffKHIyEg9/vjj3gkQAAAAAAC4HcUHAABgmgMHDuiJJ55QQkKCJOmuu+5SixYtVKRIEe3fv18LFy7UtWvXNHnyZBUpUkT9+/f3csQAAAAAAMAdKD4AAADTvPbaa0bhYdy4cerTp4/xWrdu3dS3b1/17NlTly9f1uTJk9W1a1cVKVLEW+ECAAAAAAA3YcJpAABgisOHD2v79u2SpBYtWjgUHtJUrVpVjzzyiCQpNjZWP/74o0djBAAAAAAAnkHxAQAAmCImJkbNmjVT6dKl1aFDh0zbRUREGMunTp3yRGgAAAAAAMDDGHYJAACYolGjRpozZ0627c6cOWMslylTxp0hAQAAAAAAL+HOBwAA4DExMTH65JNPJEmFChXSnXfe6eWIAAAAAACAO3DnAwAAcKuEhASdOnVK69at09y5c3Xu3DlJ0ujRo1WyZEkvRwcAAAAAANyB4gMAAHCbffv2qUePHg7PlSlTRi+88II6derkpagAAAAAAIC7UXwAAABuYz+/Q5qYmBitXLlStWvXVtWqVT0f1P87fPiwAgLy9wiUiYmJxv979uzxcjTmioiIUEhIiGw2m2LjYp1ax5ZiM/53dh1fYbOlHpvVatWBAwecWsefPx85RZ84oj8cpfVHcnKylyMBAADIGyg+AAAAtylRooTGjBmjkiVL6syZM/rmm2904MABrV69Wlu3btWcOXNUp04dr8SWnJzMCSI7aSfN/I1N/xYVXFovB+vkZfZHk5P32l8/H7lBnziiPwAAAHA9ig8AAMBtbrvtNt12223G40GDBmncuHFauHChrly5oqefflrfffedAgMDPR5bYGAgdz7YnSwMDg72YiTuY5FkCbA41da+4ODsOr7C/micfa/zw+fDVfSJI/rDEQUYAAAARxQfAACAxwQEBGjs2LHavXu3Dh48qKNHj2rTpk1q1aqVx2OpUaOGihQp4vH95iV79uxRYmKigoODVb9+fW+H4xYWi0VFCjv3PsfGxcqWYpMlwPl1fIXFklp+CAkJcfq9zg+fD1fRJ47oD0dp/REYGMiddchXNm3apGnTpkmSIiMj1aJFCy9HBAC+y99+p+bvy/0AAIDHBQYG6v777zce79y504vRAAAAIKeSk5P14YcfKi4uTnFxcfrwww8pvgFADvnj71SKDwAAwOOqVatmLEdHR3sxEgAAAORUbGysYmJijMcxMTGKjY31XkAA4MP88Xcqwy4BAABTLFmyRBs2bNCJEyc0Z84cFStWLNO2CQkJxnLhwoU9ER4AAAAAAPAg7nwAAACm+PPPP7Vq1Sr98ccf2rp1a5Zt9+zZYyzfdNNN7g4NAAAAAAB4GMUHAABgipYtWxrL8+fPz7RdTEyMFi1aJEkKDg72ymTTAAAAAADAvSg+AAAAUzRr1kwRERGSpJ9//lkff/xxujaxsbEaOnSoMc/DAw88oHLlynk0TgAAAAAA4H7M+QAAgA/atm2bJKly5coun7w/cuSINm3apLi4OA0ZMsS0mAICAvTGG2+oX79+unr1qt5++21t2bJF7dq1U9GiRXXo0CEtWrRI58+flyTVrl1bzzzzjGn7BwAAAAAAeQfFBwAAfNCAAQNksVj07LPP6qGHHnJp3VWrVmnKlCkqXry4qcUHSapTp45mzZqlESNGKCoqSps2bdKmTZvStWvZsqUmTZqkQoUKmbp/AAAAAACQN1B8AAAgn0lMTJQkxcXFuWX7TZo00cqVK/X5559r/fr1OnbsmBISElSqVCk1bNhQ3bp1Y54HAAAAAAD8HMUHAADyqKSkJO3atSvLNsePHzeGYMpOcnKyTp06ZUwGXbx48VzHmJm0uyrMvrMCAAAAAAD4BooPAADkUUFBQfr000+1fv36DF+32WxasGCBFixY4PK2LRaLbrvtttyGCAAAAAAAkKEAbwcAAAAyN2bMGBUsWFA2m83hX5rrn3f2X7FixTRixAjvHRgAAAAAAPBr3PkAAEAeVr58eY0fP14//fSTw/NLliyRxWJRnTp1VLNmTae2FRgYqCJFiujGG29Uhw4dVLp0aXeEDAAAAAAAQPEBAIC8rlOnTurUqZPDc0uWLJEkde7cWQ899JA3wgIAAAAAAMgUxQcAAHxQ48aNJaXeGQEAAAAAAJDXUHwAAMAHffbZZ94OAQAAAAAAIFNMOA0AAAAAAAAAAEzFnQ8AAPi4xMRE7dmzRxcuXFBCQoJSUlKcXrdr167uCwwAAAAAAORbFB8AAPBRSUlJmjp1qubNm6f4+HiX17dYLBQfAAAAAACAW1B8AADARw0bNkwbNmyQzWbzdigAAAAAAAAOKD4AAOCD1q5dq/Xr18tisUiSKleurNtuu00lS5ZUoUKFvBwdAAAAAADI7yg+AADgg5YuXWosP/HEExo2bJhRiAAAAAAAAPA2ig8AAPig3377TRaLRTfffLOGDx/u7XAAAAAAAAAcBHg7AAAA4LqYmBhJUsuWLb0bCAAAAAAAQAYoPgAA4INKlCghSSpYsKCXIwEAAAAAAEiP4gMAAD4oPDxcknT48GEvRwIAAAAAAJAexQcAAHzQvffeK5vNpvXr1+v8+fPeDgcAAAAAAMABxQcAAHxQly5d1KhRI129elUjRozQlStXvB0SAAAAAACAIcjbAQAAANdZLBZNmzZNQ4YM0Y4dO3T33XerY8eOatCggUqVKqVChQo5tZ3GjRu7OVIAAAAAAJAfUXwAAMAHNWjQQJKUkpIii8WiS5cuaeHChVq4cKHT27BYLPrjjz/cFSIAAAAAAMjHKD4AAOCDEhIS0j1ns9m8EAkAAAAAAEB6FB8AAPBB3bp183YIAAAAAAAAmaL4AACAD5owYYK3QwAAAAAAAMhUgLcDAAAAAAAAAAAA/oXiAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCrmfAAAwAcNHDgw19uwWCyaM2eOCdEAAAAAAAA4ovgAAIAP+vXXX2WxWHK0rs1mk6Qcrw8AAAAAAJAdig8AAPiotCKCKywWi6pUqaJChQq5ISIAAAAAAIBUFB8AAPBB69aty7aNzWZTQkKCzp8/r7179+rzzz/XmTNnVLhwYc2ePVulSpXyQKQAAAAAACA/ovhggqhLV7Xr4HnFJyQqtECwGtYqo1LFuaLUVVGXrmrn4SuKu5aowgWDVbHK1Vz1Y07fl6zW89f32heOy+zPh6/z5/7whc9jXlCxYkWn21avXl3NmjVTv3799Nhjj2n79u0aNmyY5s2bx9BLAAAAAADALSg+5MKhExe1aN0h/fr7GSWn/Dv0RWCARU3qlFfPtjVVs1IJL0boGzLrx6Vb1+SoH3P6vmS1Xu1qJSVJfxyL9qv32hc+w2Z/PnydP/eHL3wefV1oaKjee+89tWnTRjt37tQ333yjrl27um1/ly5d0oIFC7RhwwYdO3ZMcXFxKlq0qGrVqqUOHTqoe/fuCgkJcdv+AQAAAACA91B8yKGte//RO/N2KDEpJd1rySk2bd17Wtv3n9Uz/Rvp9no3eCFC32B2P+Z0e9mtt/dIVIb78+X32hc+w74Qoyf5c3/487HlNaVKldLdd9+t5cuXa+nSpW4rPmzdulUjR45UdHS0w/PR0dHaunWrtm7dqnnz5mnmzJmqVKmSW2IAAAAAAADeE+DtAHzRoRMXMz1JZi8xKUXvzNuuQycueigy32J2P+Z0e86uZ0aMeYUvfIZ9IUZP8uf+8Odjy6tuuukmSdLhw4fdsv0DBw7oiSeeMAoPLVq00NixYzV58mQ988wzqlmzpiTp0KFDGjx4sC5fvuyWOAAAAAAAgPdw50MOLFp3yOkT1YlJKVq07pBeGNTEzVH5HrP7Mafbc2W93MaYV/jCZ9gXYvQkf+4Pfz62vCo2NlaSdOXKFbds//XXX9fVq1clSePGjVOfPn0cXh80aJBGjx6tb7/9Vn///bdmzJih0aNHuyUWAAAAeJYrF5bExcUpMTFRwcHBunTpkhuj8g1m9QcX9/g/3mP//P3hj+8rxQcXRV26ql9+P+PSOr/+fkZRl/xnMlgzmN2POd3e4RMXXV4vu23m9ffaFz7DvhCjJ/lzf/jzseVVKSkpWr9+vaTUIZjMduTIEW3btk2SdPfdd6crPEhSUFCQxo8fr19++UXnzp3T4sWL9cwzzygwMND0eAAAAOBZQ4YM8XYIgN/j5wy+gmGXXLTr4Hml2E2E6ozkFJt2/3neTRH5JrP7MafbW7HlL5fXy26bef299oXPsC/E6En+3B/+fGx5UVxcnJ5//nn9/fffslgsatq0qen72Lp1q7HcpUuXTNsVKFBArVu3lpQ6MfVff/1leiwAAAAAAMB7uPPBRfEJiTlb71qSyZH4NrP7Mafbi7uas/Wyktffa1/4DPtCjJ7kz/3hz8fmbs8//7zTba1Wq6Kjo/Xbb78ZwyFJUu/evU2PKyAgQDVr1tTp06dVtWrVLNsWL17cWPbH20sBAAAAAMjPKD64KLRAcM7WK0hX2zO7H3O6vcKFcrZeVvL6e+0Ln2FfiNGT/Lk//PnY3G3JkiWyWCwur2ezpd5pMmjQIDVo0MDssNS3b1/17dvXqbb2E16HhYWZHgsAAAAAAPAezt64qGGtMgoMsCjZhWFCAgMsuiW8jBuj8j1m92NOt9fpjqpav/2ES+vlNMa8whc+w74Qoyf5c3/487F5QlohwRU1a9bUwIED1bNnTzdE5LyzZ8/qp59+kiSVKFFCVapU8Wo8AAAAMMeMGTNUrFgxp9r+8ccfxoSxtWvXdnNkeZ9Z/XH58mXmBPBzrvyc+St//P3hjz+7FB9cVKp4ITWpU15b9552ep0mdcozMep1zO7HnG6vRqUSLq+X0xjzCl/4DPtCjJ7kz/3hz8fmbnPnznW6bUBAgAoVKqQbbrhBJUuWdGNUznvrrbeUmJg67Fbnzp0VEMA0VAAAAP6gWLFiDsNrZqVw4cLGyUNn1/Fn9Aec5crPmb/i58U3UHzIgZ5ta2r7/rNKTErJtm1IUIB6tq3pgah8j9n9mNPtubJebmPMK3zhM+wLMXqSP/eHPx+bOzVp0sTbIeTYggULtHz5cklSaGioHn30US9HBAAAAAAAzEbxIQdqViqhZ/o30jvztmd5siw4KEBP92+kmpVKeDA632F2P+Z0e86ulxVfe6994TPsCzF6kj/3hz8fG9Jbu3atXn31VePxuHHjVK5cOa/Ecvjw4Xx/x0Xa3SeJiYnas2ePl6MxV0REhEJCQmSz2RQbF+vUOrb/HwLOluL8Or4ibZg2q9WqAwcOOLWOP38+cspf+6RGjRoKCnL9T8OIiAhj2Wq1mhlSnpCUlOQwP1F20j4fycnJ7goJAADAp1B8yKHb692gtyJbaNG6Q/r19zMO45UHBljUpE559Wxbk5Nk2TC7H3O6vezWq31TKclm0x/Hov3mvfaFz7AvxOhJ/twf/nxsnnb8+HGdOHFCly5dkpR6O26lSpXyxJwKq1ev1siRI42TMgMHDlSXLl28Fk9ycjIniOyknTTzNzb9W1RwaT2T5oPKK+yPJifvtb9+PnLDn/okKChIISEh3g4jT/Kn9xkAAMDTKD7kQs1KJfTCoCaKunRVu/88r/hrSQotGKRbwsswHrkL7Pvx23U7jX78T9tbc9SPOX1fnFnP395rX/gMm/358HX+3B++8HnMq2JiYvTpp5/q66+/1oULFzJsU6JECXXq1ElPPPGESpUq5eEIpUWLFmncuHFKSkqSJHXr1k0vvPCCx+OwFxgYyJ0PdifVgoODvRiJ+1gkWQIsTrW1Lzg4u46vsD8aZ9/r/PD5cJW/90lKik2X4xKcbm9f1PKnn5hihQso4P9/B7jyPlOoAAAAcETxwQSlihdS28aVvR2GzytVvJBurVHUmCwmtycbc/q+ZLWev77XvnBcZn8+fJ0/94cvfB7zkm3btumpp55SVFSUpH+HVrledHS0Pv/8c33//fd6++23dccdd3gsxmnTpmnq1KnG4x49euj111+XxeLdU1U1atRQkSJFvBqDt+3Zs8f4PVK/fn1vh+MWFotFRQo79z7HxsXKlmKTJcD5dXxF2s9bSEiI0+91fvh8uMrf++RKvFVvf7bD6fb++jPz3MBGKl6kgEs/L9K/n4/AwEDurAMAABDFBwAAfNaePXv0yCOPyGq1GkWHsLAwRUREKCwsTCkpKbp48aIOHjyoy5cvy2az6cKFC3r88ce1cOFC3XzzzW6NLyUlRePGjdPChQuN5wYOHKgXXnjB64UHAAAAAADgXhQfAADwQYmJiRo5cqQSElKHx6hdu7aeeeYZ3X777Rm237x5syZNmqTff/9dVqtVI0aM0Hfffee2YUNSUlL07LPP6ttvvzWeGz58uIYMGeKW/QEAAAAAgLwlfw90DACAj1q6dKlOnjwpi8WiVq1aaeHChZkWHiSpefPmWrhwoe666y5JqRNTf/fdd26Lb9y4cUbhISAgQOPGjaPwAAAAAABAPkLxAQAAH7Ru3TpJUtGiRfXWW285dQdDUFCQ3nrrLRUrVkyStGrVKrfEtnjxYmOopYCAAE2YMEF9+vRxy74AAAAAAEDeRPEBAAAftH//flksFrVt21ZhYWFOr1e8eHG1bdtWNptN+/btMz2uCxcuaPz48cbj5557Tl27djV9PwAAAAAAIG9jzgcAAHzQxYsXJUlVqlRxed20dWJiYswMSZI0Z84cxcbGSpIqVKigG2+8UWvXrs12vdq1a6tChQqmxwMAAAAAALyD4gMAAD6oQIECSkxMVHx8vMvrpq0TGhpqdlhasmSJsfzPP//oySefdGq9CRMmqHv37qbHAwAAAAAAvINhlwAA8EEVKlSQzWbTL7/84vK6aeuUL1/e1Jiio6N1/vx5U7cJAAAAAAB8E3c+AADgg5o2baqDBw9qz549Wr9+vdq0aePUeuvWrdNvv/0mi8Wipk2bmhpTyZIldfDgQVO3CQAAAAAAfBN3PgAA4IN69eoli8UiSXrmmWf0ww8/ZLvOhg0b9Oyzz0qSLBaLevbs6c4QAQAAAABAPsadDwAA+KDq1aurT58+mj9/vuLj4/XEE0+ocePGat++vSIiIhQWFiYpdWLqgwcP6vvvv9f27dtls9lksVh0//33Kzw83LsHAQAAAAAA/BbFBwAAfNTo0aN16tQpbdy4URaLRdu2bdO2bdsybW+z2SRJd9xxh15++WVPhQkAAAAAAPIhhl0CAMBHhYSEaMaMGXryyScVGhoqm82W5b/Q0FANGTJEs2fPVlAQ1x8AAAAAAAD34cwDAAA+LDAwUEOHDtWAAQO0YcMG/fzzzzp58qRiYmJks9lUrFgxVa1aVbfddpvat2+vYsWKeTtkAAAAAACQD1B8AADAD4SFhalbt27q1q2bt0MBAAAAAABg2CUAAHzR8ePHnWr3wQcfaNWqVbJarW6OCAAAAAAA4F/c+QAAgA9ZunSppk2bpuDgYK1cuTLLtomJifrggw+UkJCgcuXKaejQoerRo4eHIgUAAAAAAPkZxQcAAHzA1atXNWrUKG3YsEE2m00Wi0XR0dEqWbJkpuvs3r1b165dk8Vi0ZkzZ/TSSy/pxx9/1DvvvKOQkBAPRg8AAAAAAPIbhl0CACCPS05O1pAhQ7RhwwaH5//8888s1ytQoIBatWqlwMBASZLNZtPq1av11FNPyWazuS1eAAAAAAAAig8AAORxM2fO1NatW43H3bt31+rVq9WsWbMs16tfv74+/PBDrVu3zpiI2mazaf369ZozZ45bYwYAAAAAAPkbxQcAAPKw6OhoffTRR5KkwMBAvf/++3rjjTdUqVIlp7dRrlw5TZgwQa+++qosFotsNpumTp2quLg4d4UNAAAAAADyOYoPAADkYcuWLTPmbXj66afVrl27HG/rgQceUN++fSVJ8fHxWrZsmVlhAgAAAAAAOKD4AABAHvbzzz9LkkqVKqUBAwbkentDhw5VwYIFJUmbN2/O9fYAAAAAAAAyQvEBAIA87ODBg7JYLLrjjjuMiaNzIywsTM2aNZPNZtMff/xhQoQAAAAAAADpUXwAACAPi4mJkSRVqVLFtG3WqlVLknTx4kXTtgkAAAAAAGCP4gMAAHlYUlKSJCk4ONi0bRYuXFiSlJiYaNo2AQAAAAAA7FF8AAAgDytWrJgkc+9SuHz5sqR/ixAAAAAAAABmo/gAAEAeVq1aNdPnZ9i3b58kqXz58qZtEwAAAAAAwB7FBwAA8rCGDRtKknbs2KGoqKhcb+/cuXPavn27LBaLIiIicr09AAAAAACAjFB8AAAgD2vXrp0kKTk5WdOmTcv19qZNm2bMI9GqVatcbw8AAAAAACAjFB8AAMjD6tWrp1tuuUU2m00LFizQt99+m+NtffPNN/ryyy9lsVhUqlQptW3b1sRIAQAAAAAA/kXxAQCAPO7ZZ5+VxWKRJD333HOaMmWKrFar0+tfu3ZNkydP1gsvvGA8N3z4cBUoUMD0WAEAAAAAACQpyNsBAACArN1666164oknNGPGDEnSzJkztXDhQnXq1EnNmjXTrbfeqhIlShjtbTaboqKitGPHDm3evFlr1qxRTEyMbDabJOnee+9Vz549vXIsAAAAAAAgf6D4AACADxg2bJiuXr2q//3vf7JYLIqKitK8efM0b948SVJwcLDCwsJktVp1+fJlo9AgyWG5Z8+eeuWVVzwePwAAAAAAyF8oPgAA4COee+45NWjQQG+88YbOnTsnm80mi8Uim80mq9Wqc+fOZbpu5cqV9fTTTxsTWAMAAAAAALgTxQcAAHxIhw4d1KZNGy1dulTffvutdu3apaSkpAzbFi9eXE2bNtV9992n1q1bKzAw0MPRAgAAAACA/IriAwAAPiYkJEQPPPCAHnjgASUkJOjIkSM6ffq04uPjFRgYqKJFi6pKlSqqVKmSMVE1AAAAAACAJ1F8AADAhxUoUEC1a9dW7dq1vR0KAAAAAACAIcDbAQAAAAAAAMD3FClSRGFhYcbjsLAwFSlSxHsBAYAP88ffqRQfAAAAAAAA4LLAwEA99thjKly4sAoXLqzHHnuMecYAIIf88Xcqwy4BAAAAAAAgR1q0aKEWLVp4OwwA8Av+9juVOx8AAIBHvPLKK6pVq5amTp3q7VAAAAAAAICbUXwAAABut3XrVi1YsMDbYQAAAAAAAA+h+AAAANxq3759ioyMVEpKirdDAQAAAAAAHkLxAQAAuM3GjRv10EMPKTY21tuhAAAAAAAAD2LCaQAAYDqr1aoPPvhAM2fO5I4HAAAAAADyIe58AAAAptqyZYs6duyo6dOnKyUlRaGhoXrooYe8HRYAAAAAAPAgig8AAMBUy5Yt08mTJyVJdevW1aJFi9S6dWsvRwUAAAAAADyJYZcAAIDpSpYsqcjISPXu3VuBgYGKiorydkgAAAAAAMCDKD4AAABT9evXT+PGjVPBggW9HQoAAAAAAPASig8AAMBU9erV83YIAAAAAADAyyg+AACAfOnw4cMKCPCf6a9q1KihoCDXUruIiAhj2Wq1mh2SVwUHB8tisUiSYuNinVrHlmIz/nd2HV9RuFCwJMlmsykxMdGpdfz58xEUFCSLxSKbzaakpCSn1/PXPsnJz4vkvz8zNlvqcVmtVh04cMDp9dJ+tpKTk90SFwAAgK+h+AAAAPKl5ORkvzpBFBQUpJCQEG+HkSelnSB19zp5WcD/n1i2WCx8TuzQH+nl9LPvTz8z9kfibLEOAAAA6VF8AAAA+VJgYKBf3fmQJiXFpstxCU61tT/BZnFPOF5TvEgB40puS4BzR2d/8tTZdXwNn49UaZ8PV/pD8t8+ycnPi+S/PzP2RxIcHOz0ehQqAAAAHFF8AAAA+VKNGjVUpEgRb4dhuivxVr392Q6n2sbGxcqWYpMlwKIihf2rL15//A79/7lUp4/Nn/sjDZ+PVGmfD1f6Q/LfPsnJz4vkv/2RVogJCQlR/fr1nV5vz549SkxMVGBgoF/dWQcAAJBT/ne5HwAAAAAAAAAA8CqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFNRfAAAAAAAAAAAAKai+AAAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGCqIG8HAAAA/F/Tpk118OBBb4cBAAAAAAA8hDsfAAAAAAAAAACAqSg+AAAAAAAAAAAAU1F8AAAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFNRfAAAAAAAAAAAAKai+AAAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGAqig8AAAAAAAAAAMBUQd4OAAAAAAAAAID72JITlJKb9ZMSMnwuN9uEc2zJ6fse8BUUHwAAAAAAAAA/Fn90pfnbPGb+NgH4F4ZdAgAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIo5HwAAAAAAAAA/UbRoUc2bN8/bYeTaH3/8ocTERAUHB6t27dreDsfr7PujaNGi3g4HcArFBwAAAAAAAMBPBAQEqHjx4t4OI9cKFy5snGz3h+PJLfv+CAhgMBv4Bj6pAAAAAAAAAADAVBQfAAAAAAAAAACAqSg+AAAAAAAAAAAAU1F8AAAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYKogbwcAAAD8j81m03fffaevv/5a+/fvV3x8vMqUKaPGjRurX79+ql+/vrdDBAAAAAAAbkTxAQAAmOratWsaPny4fvjhB4fnT506pVOnTunbb7/ViBEj9Oijj3onQAAAAAAA4HYUHwAAgKlefPFFo/BQvXp1PfDAAypdurR+//13LViwQPHx8Xr33XdVrlw5denSxbvBAgAAAAAAt6D4AAAATLN582Z99913kqRmzZpp1qxZKlCggCTp3nvv1f3336++ffsqJiZGb7zxhtq2basiRYp4M2QAAAAAAOAGTDgNAABM88knn0iSgoKC9PrrrxuFhzTVq1fXmDFjJEkxMTFatGiRx2MEAAAAAADuR/EBAACYIiYmRlu2bJEktWzZUpUqVcqwXadOnVSqVClJ0vfff++x+AAAAAAAgOdQfAAAAKbYvn27UlJSJKUOuZSZgIAANW7cWJL022+/6dKlSx6JDwAAAAAAeA7FBwAAYIpDhw4Zy+Hh4Vm2rVGjhiTJZrPpzz//dGtcAAAAAADA8yg+AAAAU5w6dcpYrlixYpZty5cvn+F6AAAAAADAP1B8AAAApoiOjjaWS5QokWXbsLAwYzkmJsZNEQEAAAAAAG8J8tSOkpOTHR7Hx8d7atd5WtrY2CkpKYqNjfVyNN5HfziiPxzRH47oD0f2/XH9d8z130Fwj2vXrhnLBQoUyLJtSEhIhuu5U37JRa7GW1WqiMWptqFBQbKl2GQJsKhQQefW8RWxsbEKCLDQH/+P/nCUk/6Q/LdP6A9HV+PjFKhEl9dLy0VsNpvD8+QhSJNfchFX8DeNI/rDEf3hiP5wRH+klxdzEYvt+mjc5Ny5czpx4oQndgUAgINKlSqpbNmy3g7D7w0cOFC//PKLJOnAgQOyWDI/EbV161YNGjRIkjR06FBFRka6PT5yEQCAN5CHIA25CADAG7yZizDsEgAAMEXBggWN5cTErK8YtVqtxrL9XRAAAAAAAMA/UHwAAACmCA0NNZYTEhKybGtffMhuiCYAAAAAAOB7PDbng/3EklLqiYbAwEBP7R4AkI8kJyc7nPy+/jsI7lGsWDFjOSYmRkWLFs20rf0k0yVLlnRnWAZyEQCAJ5CHIDPkIgAAT8hLuYjHig8hISGMcwkAgB+rWrWqsXz69GlVqlQp07ZnzpwxlitUqODOsAzkIgAAwJvIRQAA+Q3DLgEAAFNUr17dWD506FCWbdNet1gsqlmzplvjAgAAAAAAnkfxAQAAmKJhw4YKDg6WJP3yyy+ZtktOTta2bdskSREREQ7DNQEAAAAAAP9A8QEAAJiiWLFiatasmSRp/fr1+ueffzJst3z5ckVHR0uSOnbs6LH4AAAAAACA51B8AAAAphk0aJAkKTExUSNHjlRsbKzD64cPH9b48eMlSYULF1bPnj09HSIAAAAAAPAAi81ms3k7CAAA4D+GDRumVatWSZJuvPFG9enTRzfccIP++OMPffHFF4qLi5MkvfLKK+rdu7c3QwUAAAAAAG5C8QEAAJjq6tWrioyM1KZNmzJ83WKxKDIyUpGRkR6ODAAAAAAAeArFBwAAYDqbzaZvv/1WS5cu1f79+3XlyhWFhYXptttu08CBA3Xbbbd5O0QAAAAAAOBGFB8AAAAAAAAAAICpmHAaAAAAAAAAAACYiuIDAAAAAAAAAAAwFcUHAAAAAAAAAABgKooPAAAAAAAAAADAVBQfAAAAAAAAAACAqSg+AAAAAAAAAAAAU1F8AAAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFMFeWInR44c0RdffKFNmzbp7NmzkqRKlSqpdevWevDBB1WyZElPhOFVU6dO1bRp01xer1u3bnrzzTfdEFHeYLVa9dVXX2nlypU6ePCg4uPjVbx4cdWrV09du3ZV+/btZbFYvB2mR8XGxmrevHlas2aN/vrrL1mtVlWoUEEtW7bUwIEDVblyZW+H6FavvPKK5s+fr8jISA0dOjTb9hs3btQXX3yhPXv26PLlyypVqpTq16+vPn366I477vBAxJ7har+keeyxx/TDDz9owoQJ6t69uxsj9BxX+uLq1av6+uuvtWbNGv3555+6cuWKChcurJtuuklt27ZVnz59VLhwYQ9FDm/K77kIeUjmyEUc5fc8RCIXyQh5yL/IQ5AT+T0PkchFskIu4ohchFwkI+Qi//K1XMTtxYdPP/1UEydOVGJiosPzBw8e1MGDB7Vo0SLNmDFDt9xyi7tD8Un+/AVz9uxZPfbYY9q/f7/D8xcuXNCGDRu0YcMGtWrVSu+9955CQ0O9FKVn7dmzR0OGDNH58+cdnv/rr7/0119/acGCBXr66ac1aNAg7wToZlu3btWCBQucapuSkqKXX35ZixYtcnj+zJkzOnPmjFavXq3+/fvrpZde8vmfI1f6xd7ixYv1ww8/mB+QF7nSFwcOHFBkZKROnDjh8HxMTIx27typnTt36rPPPtOMGTNUp04dd4SLPIJcJOd8/fdndshFHOX3PEQiF8kIeci/yEOQE+QhuePLvz+dQS7iiFyEXCQj5CL/8sVcxK3Fh88++0wTJkyQJBUqVEj333+/6tWrp2vXrmn58uX65ZdfFBUVpUcffVTLly9XmTJl3BmOV3Xq1Ek333xztu0uXryocePGKSkpSYULF9bgwYM9EJ3nJSYmOnzBVqlSRT169NANN9ygY8eOacGCBYqOjtbGjRs1atQozZw508sRu9/Ro0c1aNAgxcXFSZKqVq2qnj17qkKFCjp27Ji++OILnT9/XhMmTFBycrLffTb27dunyMhIpaSkONX+/fffN75gb7jhBvXt21cVK1bU0aNHNX/+fEVHR2vevHkqVaqUhgwZ4s7Q3crVfkmzceNGvfzyy26Kyjtc6YuzZ89q8ODBunDhgiTplltuUadOnVS2bFmdP39eK1as0K5du3TmzBkNHjxYX3/9tSpWrOjuQ4AXkIukIg9Jj1zEUX7PQyRykYyQh/yLPAQ5QR7yL3KR9MhFHJGLkItkhFzkXz6bi9jc5MSJE7b69evbwsPDbc2aNbMdPHgwXZvXXnvNFh4ebgsPD7e9+uqr7grFZ6SkpNgefvhho0+WL1/u7ZDcZuHChcZxPvbYY7arV686vB4VFWW77777jDY//fSTlyL1nF69ehnHO3ToUFtCQoLD6zExMbZu3brZwsPDbbVr187wZ8pX/fDDD7ZGjRoZxx8eHm6bMmVKpu0PHz5su/nmm23h4eG2zp0722JiYhxeP3v2rO2ee+6xhYeH2+rUqWM7efKkuw/BLVztlzRff/21rW7dug7rff311x6I2H1c7Ytnn33WaPfOO+9k2Oadd94x2kRGRrordHgRuYhr8lMeYrORi1wvP+chNhu5SEbIQ/5FHoKcIA9xHbkIuQi5CLmIPXKRf/lyLuK2CaenT5+ua9euSZLee+89hYeHp2vz7LPPGmMbrly50l2h+Iy0MSAl6b777lOnTp28HJH7rF69WpIUEBCgV199VQULFnR4vWTJknrxxRfTtfdXO3fu1K5duyRJN910k9555x2FhIQ4tClevLjef/99BQcHKykpSR988IE3QjWV1WrVlClT9Pjjj+vy5ctOrzdnzhwlJydLksaNG6fixYs7vF62bFm99dZbklKvJvnf//5nXtAekNN+uXLlil5++WU9//zzslqtbozQc3LSF1euXNHy5cslSXXq1NGoUaMybDdq1Cjj1sK1a9fq4sWL5gSNPINcxDX5KQ+RyEXs5dc8RCIXyQh5yL/IQ5Ab5CGuIxchF5HIRZxFLpIeuUiqvJaLuKX4YLVajV+Kbdq0UdOmTTNsFxISosjISA0cOFD9+/f3mw9GTpw7d04TJ06UJIWFhemFF17wckTudfLkSUmpX6Zly5bNsE2DBg2M5VOnTnkkLm/56aefjOWHH35YBQoUyLBd2qRkkrRu3TrFx8d7JD532LJlizp27Kjp06crJSVFoaGheuihh7JdLyUlRatWrZIkhYeHq1GjRhm2a9iwofFLdNWqVbLZbOYF70Y57ZfvvvtO7du318KFCyWl/mz17t3b3eG6VU77Yvv27caYuv/5z38yHdvSYrGoffv2klI/V3v37jUveHgduYhr8lseIpGL2MuPeYhELpIR8pB/kYcgN8hDXEcuQi6ShlyEXIRcJJW/5CJuKT5s3bpVsbGxkqRu3bpl2bZfv3568cUXNWTIkHRVzfxk4sSJxrh2Tz/9tEqUKOHliNyraNGikqSoqCjjuK9n/8WadjWIvzpy5IixfMcdd2TZtn79+pKka9euad++fW6Ny52WLVtmJFt169bVokWLjCQiK3/++adiYmIkSc2aNcuybdrr586d08GDB3MXsIfktF8WLlyoqKgoSVKLFi20ZMkSh0TVF+W0L5KSklSrVi0VL15cVatWzbKt/dUhrlxRgbyPXMQ1+S0PkchF7OXHPEQiF8kIeci/yEOQG+QhriMXIRdJQy5CLkIukspfchG3TDh94MABY9n+jY6OjtbRo0eVkJCgKlWq6MYbb3TH7n3OgQMHtGzZMkmpFcsePXp4OSL3q1+/vvbs2SObzaZPPvlEQ4cOTdfmo48+MpZbtGjhyfA87sqVK8ZyZlc8pAkLCzOWDx06pCZNmrgrLLcrWbKkIiMj1bt3bwUGBhpfFFn5888/jeWMbl22V6NGDWP5wIEDioiIyHmwHpSTfpGkihUratSoUercubObI/ScnPTFPffco3vuucep7R8+fNhYtv/Zgu8jF3FefsxDJHIRe/k1D5HIRTJCHvIv8hDkFHmIa8hFyEXIRchF7JGL/MsfchG3FB8OHTokKfUWwnLlyun48eN68803tXHjRiUlJRnt6tWrpxdeeEG33nqrO8LwGVOnTjVufRo+fLgCAtw2FUee8eCDD2rx4sWKj4/XjBkzdPnyZfXu3VsVKlTQ8ePH9fHHH+ubb76RJDVp0kT33nuvlyN2r9DQUGM5ISFBwcHBmbZNu4JGSq1c+6p+/fpp3Lhx6ca1zI79lR8VK1bMsu0NN9yQ4Xp5WU77ZeTIkapXr56Cgtzya90rctoXzrp27ZoxDmJgYKDq1q3rlv3AO8hFnJcf8xCJXMRefsxDJHKRjJCH/Is8BLlBHuIachFyEXIRcpE05CL/8pdcxC2/0c+ePSsp9daNbdu2qUuXLlq3bp3Dl6wk7d27VwMGDDAOND86fvy41q9fLyl1Up22bdt6OSLPqFy5smbPnq1y5copJSVFc+fOVadOnXTLLbfovvvu0zfffKPg4GANGDBAs2bNUmBgoLdDdiv7K16yu23w999/N5Z9+fbsevXq5egXaHR0tLGc3a249rePpd2SmNfltF8aNmzoV1+yUs77wlnTp083Pk933nknVxz6GXIR5+TXPEQiF7GXH/MQiVwkI+Qh/yIPQW6QhziPXIRcRCIXcRW5SHrkIq7zVC7iluJD2lh1V69eVWRkpOLj43X//ffru+++0969e7V27Vr997//VUBAgJKSkjR69Gjt37/fHaHkefPmzVNKSook6aGHHsp0EhB/1KhRI02aNCnTD3exYsVUrVo1v/6CTWM/Rt+8efMybRcVFaV169YZj/PjhGTXrl0zlrMbE9V+kir79YCNGzcatzAHBgZq2LBhXo4IZiMXcU5+zkMkcpE05CGuIRdBbpGH+D/yEOeRi5CLSOQiriIXQW55Mhdxa/EhNjZWMTExGjZsmMaPH6+aNWsqJCRElSpV0tNPP62xY8dKSv1l8c4777gjlDwtLi5OixcvliSVLl1aXbt29W5AHpSYmKiRI0eqX79+iomJUbNmzfTyyy9r8uTJGjVqlGrUqKGoqCi9+uqrGjx4sN//grzzzjtVuXJlSdKaNWs0e/bsdG2uXr2qkSNHKj4+3nguvyVmkhyuFsruS9b+9euvMkL+tXv3bo0YMcL4IycyMlK1a9f2clQwG7lI9vJzHiKRi9gjD3ENuQhygzwkfyAPcQ65CLlIGnIR15CLIDc8nYu4fSC98PBwDRkyJMPXevfubUy+tHnzZp0+fdrd4eQpK1asMCbV6dmzZ7a/MPzJqFGjjFtLx4wZozlz5qhfv37q1KmTHn30US1btky9evWSJP3666964YUXvBmu2wUGBur11183rmaYOHGi+vfvr88//1wrV67UBx98oM6dO+vnn39Wq1atjPXsK9j5hf0tZ4mJiVm2tb8KIqsxI5F/7NixQ4MHDzYS1nvuuUePP/64l6OCu5GLZCw/5yESuYg98hDXkIsgp8hD8ifykMyRi5CLpCEXcQ25CHLKG7mIW4oPhQoVMpY7d+6cZSWyQ4cOxvLOnTvdEU6etXbtWmPZvh/83a+//qpVq1ZJkrp166b+/funaxMYGKixY8eqfv36kqTly5cbk3b5q6ZNm2rSpEnGl8i2bdv06quvasSIEZo8ebJOnTqlzp0767nnnjPWKVq0qLfC9Rr7iaiyu8UyISHBWHbnOHnwDRs3btQjjzxiTFDWvHlzTZo0Kd9MaJffkItkL7/mIRK5SEbIQ5xHLoKcIA/JX8hDnEMuQi5ij1zEeeQiyAlv5SJu2XqRIkWM5ZtuuinLttWqVTOW0yZlyg+uXr2qrVu3SpKqVq2qiIgIL0fkOatXrzaW+/btm2m7wMBAhy/gH374wZ1h5QkdOnTQihUr1LdvX1WqVEkhISEqXbq0WrVqpZkzZ2rSpEnGLbySVKZMGS9G6x3FihUzlrObLOnSpUvGcsmSJd0VEnzAokWLNGTIEKO637JlS82cOTPfXV2Vn5CLZC0/5yESuUhmyEOcQy4CV5GH5D/kIdkjFyEXyQi5iHPIReAqb+YibpkG/MYbb9S2bducamt/kGljTeUHmzdvNqqP7du393I0nvX3338by7Vq1cqyrf2YYydPnnRbTHlJxYoVjbE/M3Lw4EFj2T5RzS+qVq1qLGd3W7L96xUqVHBXSMjjZsyYoffff9943L59e02cOJE/+P0cuUjW8nMeIpGLZIU8JHvkInAFeUj+RB6SPXIRcpHMkItkj1wErvB2LuKWOx/Cw8ON5VOnTmXZ9sKFC8ZyuXLl3BFOnvTTTz8Zy3fffbcXI/E8m81mLNvf/pUR+1t/0sb+y+9+/fVXSakTK918881ejsbzatSoYSxnd8up/ev2v5eQf7z77rsOX7K9evXSe++9xx/8+QC5SNbycx4ikYvkRn7PQyRyETiPPCT/Ig/JHrkIuUhOkYuQi8B5eSEXcUvxoXHjxsbyjz/+mGXb3bt3G8v56Ydg165dklIne8lvtxeWL1/eWN63b1+Wbf/8809j2Z8rtAcPHtSwYcPUu3dvbdq0KdN2165dM36m6tatqxIlSngqxDyjWrVqKl26tCTpl19+ybJt2uthYWH56vcLUs2aNUuzZs0yHj/++ON69dVXGVs5nyAXyVp+zkMkcpHrkYe4hlwEziAPyd/IQ7JHLkIuYo9cxDXkInBGXslF3LK3evXqqUqVKpKkrVu3as+ePRm2u3jxopYvXy5Jql69era3mvmLhIQEHT58WFJqcpHfrny5/fbbjeW5c+dm2s5ms2n+/PnG4xYtWrg1Lm8qXLiwVq1apV27dhk/ExmZP3++MZ7f/fff76Ho8paAgADjyph9+/Y5JOv2duzYod9//11S6i1l/KGXv/z888+aNGmS8XjEiBF66qmnvBgRPI1cJHP5PQ+RyEWuRx7iGnIRZIc8BOQhWSMXIRe5HrmIa8hFkJ28lIu47VM3ZMgQSam/KJ9++mn9888/Dq9brVY9++yzxsQngwYNclcoec7Ro0eVnJwsSfnyFrF77rlHFStWlCRt2LBBM2fOTNfGZrPpnXfeMW6na9GihV9fDXHjjTeqXr16kqRvv/02wy+OjRs3avLkyZKkSpUqqXv37p4MMU8ZMGCAgoJSp6x57rnndP78eYfXz549q9GjR0tKvS31wQcf9HiM8J6EhAS98MILxq3MAwYM0BNPPOHlqOAN5CIZy+95iEQucj3yENeRiyAz5CFIQx6SOXIRcpHrkYu4jlwEmclruYhbJpyWpK5du2rdunVavXq1/v77b913333q2bOnateuraioKC1cuFBHjx6VJDVp0kQ9e/Z0Vyh5zvHjx43lUqVKeTES7wgJCdFbb72lhx56SImJiXrvvfe0du1a3XvvvSpXrpzOnTunZcuWGdXZkiVL6rXXXvNy1O43atQoo0/69++vnj17qkGDBkpMTNSmTZu0evVqpaSkKDg4WG+//Xa+vDokTY0aNTRw4EB98skn+uuvv9SlSxf17dtX1apV07FjxzR//nxFRUVJkgYPHqzq1at7OWJ40pIlS4yxdQsXLqxbb71Va9euzXa9atWq8VnxM+QiGcvveYhELpIR8hDXkIsgM+QhSEMekjlyEXKRjJCLuIZcBJnJa7mI24oPkjRp0iS99NJLWrp0qa5cuaJPPvkkXZsWLVro/fffl8VicWcoecrZs2eN5SJFingxEu9p3LixPvzwQ40cOVIxMTHat29fhuMcVq1aVTNmzPDbcQ3t3X777Ro7dqxef/11JSYmav78+Q63V0pSiRIlNHnyZN16661eijLvePrpp3Xx4kUtWbJEUVFRmjp1aro2PXv25Bb3fGjJkiXGclxcnNOfgcjISA0dOtRdYcFLyEXSIw9JRS7iiDzEdeQiyAh5COyRh2SMXCQVuYgjchHXkYsgI3ktF3Fr8SE4OFhvvfWWunXrpkWLFmnHjh2KiopSWFiYatasqV69eumee+7Jd2OOxcXFGcvFihXzYiTe1bx5c61Zs0ZffPGFNmzYoKNHjyouLk7FihVTRESE2rVrpx49euSranafPn3UsGFDzZkzR7/88ovOnTun4OBgVatWTW3btlW/fv0UFhbm7TDzhMDAQL355pvq2LGjFixYoL179yomJkZFihRRgwYN1KdPH911113eDhNeYD8hG0Aukh55yL/IRRyRh7iGXAQZIQ+BPfKQjJGL/ItcxBG5iGvIRZCRvJaLWGxpA0ABAAAAAAAAAACYIH+V1wEAAAAAAAAAgNtRfAAAAAAAAAAAAKai+AAAAAAAAAAAAExF8QEAAAAAAAAAAJiK4gMAAAAAAAAAADAVxQcAAAAAAAAAAGAqig8AAAAAAAAAAMBUFB8AAAAAAAAAAICpKD4AAAAAAAAAAABTUXwAAAAAAAAAAACmovgAAAAAAAAAAABMRfEBAAAAAAAAAACYiuIDAAAAAAAAAAAwFcUHAAAAAAAAAABgKooPAAAAAAAAAADAVBQfAAAAAAAAAACAqSg+AAAAAAAAAAAAU1F8AAAAAAAAAAAApqL4AAAAAAAAAAAATEXxAQAAAAAAAAAAmIriAwAAAAAAAAAAMBXFBwAAAAAAAAAAYCqKDwAAAAAAAAAAwFQUHwAAAAAAAAAAgKkoPgAAAAAAAAAAAFP9HxCLu63CfQCIAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, - "metadata": {}, + "metadata": { + "image/png": { + "height": 184, + "width": 783 + } + }, "output_type": "display_data" } ], @@ -585,17 +637,17 @@ " fig, (ax1, ax2, ax3) = plt.subplots(1,3)\n", "\n", " ax1.set_title(\"(a) strip plot\")\n", - " sns.stripplot(x=pricesW, ax=ax1, jitter=0)\n", - " ax1.set_xticks(range(7,13))\n", + " sns.stripplot(x=prices, ax=ax1, jitter=0)\n", + " ax1.set_xticks(range(6,13))\n", "\n", " ax2.set_title(\"(b) hist plot\")\n", - " sns.histplot(x=pricesW, ax=ax2)\n", - " ax2.set_xticks(range(7,13))\n", + " sns.histplot(x=prices, ax=ax2)\n", + " ax2.set_xticks(range(6,13))\n", " ax2.set_yticks(range(0,5))\n", "\n", " ax3.set_title(\"(c) box plot\")\n", - " sns.boxplot(x=pricesW, ax=ax3)\n", - " ax3.set_xticks(range(7,13))\n", + " sns.boxplot(x=prices, ax=ax3)\n", + " ax3.set_xticks(range(6,13))\n", "\n", "savefig(fig, \"figures/epricesW_strip_hist_box_plots.png\")" ] @@ -626,22 +678,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "a746d4e7-a90b-498f-ac4b-67521aadd149", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "len(pricesW)" + "len(prices)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "71a0a111-acc0-4140-ad01-49a3959e5ee9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "9.155555555555555" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "mean(pricesW)" + "mean(prices)" ] }, { @@ -662,10 +736,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "e4b22ff8-db8a-413b-bc55-61f2b64a0de9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " East West\n", + "0 7.7 11.8\n", + "1 5.9 10.0\n", + "2 7.0 11.0\n", + "3 4.8 8.6\n", + "4 6.3 8.3\n", + "5 6.3 9.4\n", + "6 5.5 8.0\n", + "7 5.4 6.8\n", + "8 6.5 8.5\n" + ] + } + ], "source": [ "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", "\n", @@ -676,7 +767,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "38c52689-1b5a-4053-8f76-c10a7b76cd72", "metadata": {}, "outputs": [], @@ -686,17 +777,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "927b21a6-7b75-4ac5-b93c-2b5632e0b608", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(pricesW)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "cd3b487a-545b-4680-b315-2e7d0471d63d", "metadata": {}, "outputs": [], @@ -723,30 +825,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "70075246-d2b0-48f0-9c26-b00b20c05bc7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pricesW.count()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "4bf01229-90ea-4352-a26e-7d4fa6c766c2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "9.155555555555557" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pricesW.mean()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, + "id": "aaff543c-6428-48b7-922a-3b3273324402", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.5621388471508475" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pricesW.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "id": "81a08643-8207-4130-93f8-8e9227f54c2e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 9.000000\n", + "mean 9.155556\n", + "std 1.562139\n", + "min 6.800000\n", + "25% 8.300000\n", + "50% 8.600000\n", + "75% 10.000000\n", + "max 11.800000\n", + "Name: West, dtype: float64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pricesW.describe()" ] @@ -761,1061 +925,135 @@ }, { "cell_type": "markdown", - "id": "66858e0a-d5d6-4d19-8023-d5cc3c9629b3", + "id": "345999ef-eab8-4c4d-b590-38cffc41460f", "metadata": {}, "source": [ - "### Understanding probability distributions" + "### Data cleaning" ] }, { - "cell_type": "markdown", - "id": "ebd06710-51b1-4486-84f3-3d5c24db0471", + "cell_type": "code", + "execution_count": 29, + "id": "70161723-bfab-4810-9cef-49cff61aa74d", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " East West\n", + "0 7.7 11.8\n", + "1 5.9 10.0\n", + "2 7.0 11.0\n", + "3 4.8 8.6\n", + "4 6.3 8.3\n", + "5 6.3 9.4\n", + "6 5.5 8.0\n", + "7 5.4 6.8\n", + "8 6.5 8.5\n" + ] + } + ], "source": [ - "A random variable ...\n", + "import pandas as pd\n", "\n", - "described by\n", - "\n" + "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", + "epriceswide = pd.read_csv(DATA_URL)\n", + "print(epriceswide)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ec5cff0c-043f-46e2-b9dd-55b61562311f", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16767a22-5e52-4303-9ac6-00380ac765ea", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", - "id": "ca16dacc-b0bb-4219-8a9e-fb64976d41ed", + "id": "7a20430b-47bc-4eed-b536-d2c220f6bc3b", "metadata": {}, "source": [ - "#### Building computer models for probability distributions" + "Click [here](https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0A%0Aepriceswide_csv%20%3D%20'''%0AEast,West%0A7.7,11.8%0A5.9,10.0%0A7.0,11.0%0A4.8,8.6%0A6.3,8.3%0A6.3,9.4%0A5.5,8.0%0A5.4,6.8%0A6.5,8.5%0A'''%0A%0Aepriceswide%20%3D%20pd.read_csv%28io.StringIO%28epriceswide_csv%29%29%0A%0Aepriceswide.melt%28var_name%3D%22end%22,%20value_name%3D%22price%22%29&d=2023-07-02&lang=py&v=v1) to see a visualization of the above melt operation." ] }, { - "cell_type": "markdown", - "id": "8b2acf54-1940-466d-8289-cd2c223f124e", + "cell_type": "code", + "execution_count": 30, + "id": "f2172968-641c-4951-a722-029ac2bc853b", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " end price\n", + "0 East 7.7\n", + "1 East 5.9\n", + "2 East 7.0\n", + "3 East 4.8\n", + "4 East 6.3\n", + "5 East 6.3\n", + "6 East 5.5\n", + "7 East 5.4\n", + "8 East 6.5\n", + "9 West 11.8\n", + "10 West 10.0\n", + "11 West 11.0\n", + "12 West 8.6\n", + "13 West 8.3\n", + "14 West 9.4\n", + "15 West 8.0\n", + "16 West 6.8\n", + "17 West 8.5\n" + ] + } + ], "source": [ - "The standard normal distribution is denoted $Z \\sim \\mathcal{N}(\\mu=0,\\sigma=1)$,\n", - "where $Z$ is the name has the probability density function:\n", - "\n", - "$$\n", - " f_Z(z) = \\tfrac{1}{\\sqrt{2\\pi}} e^{ - \\frac{1}{2}z^2}.\n", - "$$\n", - "\n", - "The standard normal is a special case of the general normal $\\mathcal{N}(\\mu, \\sigma)$\n", - "where $\\mu$ is the mean and $\\sigma$ is the standard deviation.\n" + "eprices = pd.melt(epriceswide, var_name=\"end\", value_name='price')\n", + "print(eprices)" ] }, { - "cell_type": "markdown", - "id": "b15f8536-7e94-43fc-9c01-ac115e24a710", + "cell_type": "code", + "execution_count": 31, + "id": "37ed70f2-0a06-42bb-be5f-9bef646f44f2", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([11.8, 10. , 11. , 8.6, 8.3, 9.4, 8. , 6.8, 8.5]),\n", + " array([7.7, 5.9, 7. , 4.8, 6.3, 6.3, 5.5, 5.4, 6.5]))" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "To create a computer model for the random variable $Z$,\n", - "we can define the following Python function that performs the same calculation as the math function $f_Z$." + "pricesW = eprices[eprices[\"end\"]==\"West\"][\"price\"]\n", + "pricesE = eprices[eprices[\"end\"]==\"East\"][\"price\"]\n", + "\n", + "pricesW.values, pricesE.values" ] }, { "cell_type": "code", "execution_count": null, - "id": "eabedcd7-cb40-4806-9eab-f1e860a9b14c", + "id": "157b54eb-72ff-404a-a5c9-7b330fdba7dc", "metadata": {}, "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "def fZ(z):\n", - " const = 1 / np.sqrt(2*np.pi)\n", - " exp = np.exp(-1/2 * z**2)\n", - " return const*exp" - ] + "source": [] }, { "cell_type": "markdown", - "id": "29209544-25de-4cd2-a0e7-915219c4657c", + "id": "d23d66f9-3842-41c4-b6d4-48e039a9c42b", "metadata": {}, "source": [ - "Note the definition of the Python function `fZ` matches exactly the \n", - "calculations described in the complicated-looking math definition of $f_Z$ we saw above.\n", - "This is one of the key benefits of learning Python:\n", - "you can convert any math expressions into code expressions\n", - "then do computations with it." - ] - }, - { - "cell_type": "markdown", - "id": "61d16ed1-112e-4169-b1da-7a799cd14bb6", - "metadata": {}, - "source": [ - "We can now compute the value $f_Z(1)$ by calling the function `fZ` with input `1`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cea5af88-b561-4d81-916f-dc13cb9145aa", - "metadata": {}, - "outputs": [], - "source": [ - "fZ(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e06bc6b-03bb-4da3-bc6d-097bec212140", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "da9e61a8-0caa-4ffa-9182-a274fefffb0e", - "metadata": {}, - "source": [ - "#### Predefined computer models\n", - "\n", - "Instead of defining our own function to use for computations,\n", - "we can use one of the pre-defined probability model families in the SciPy library.\n", - "\n", - "To create a computer model for the standard normal random variable $Z \\sim \\mathcal{N}(\\mu=0, \\sigma=1)$,\n", - "we need to \"import\" the `norm` model family form `scipy.stats` then call `norm(0,1)`\n", - "to initialize the model with parameters $\\mu=0$ and $\\sigma=1$." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bbaf06c0-d415-46fd-9002-70896cd3b00c", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.stats import norm\n", - "rvZ = norm(0,1)\n", - "# rvZ" - ] - }, - { - "cell_type": "markdown", - "id": "983cef74-990e-4539-8b7f-429b84ec15be", - "metadata": {}, - "source": [ - "The probability density function $f_Z$ is available as the `.pdf` method on the model `rvZ`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6a34bd0-37ca-4347-a82f-9cfbabcdf2c8", - "metadata": {}, - "outputs": [], - "source": [ - "rvZ.pdf(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0fab2bab-dfac-4ce4-960d-b614a8bf425c", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "8191f640-7916-456b-8946-1ddb19ae9991", - "metadata": {}, - "source": [ - "#### Probability model visualizations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1061670-cdb0-4de7-8136-d343c5cf17b6", - "metadata": {}, - "outputs": [], - "source": [ - "zs = np.linspace(-4, 4)\n", - "fZs = rvZ.pdf(zs)\n", - "sns.lineplot(x=zs, y=fZs)\n", - "\n", - "# FIGURES ONLY\n", - "ax = sns.lineplot(x=zs, y=fZs, color=\"b\")\n", - "ax.set_xlabel(\"$z$\")\n", - "ax.set_ylabel(\"$f_Z$\")\n", - "savefig(ax.figure, \"figures/pdf_of_rvZ.png\")" - ] - }, - { - "cell_type": "markdown", - "id": "e88417e2-1fef-4b14-b496-08595a587974", - "metadata": {}, - "source": [ - "The above graph tells you everything you need to know about the random variable $Z$.\n", - "The possible values of $Z$ are concentrated around the mean $\\mu=0$.\n", - "The region of highest density is roughly between $z=-1$ and $z=1$,\n", - "with most of values between $z=-2$ and $z=2$,\n", - "then the probability densities drops off to form long tails." - ] - }, - { - "cell_type": "markdown", - "id": "03cd7d2d-81ba-41f3-9b09-3dc01e3bc830", - "metadata": {}, - "source": [ - "The above graph shows the \"shape\" of the normal distribution $\\mathcal{N}(\\mu=0, \\sigma=1)$,\n", - "which is just one representative of the general normal distribution.\n", - "Here some examples of graphs of the normal distribution for choices of the parameters $\\mu$ and $\\sigma$\n", - "to give you an idea of what they do.\n", - "\n", - "![normal_panel.png](./attachments/normal_panel.png)" - ] - }, - { - "cell_type": "markdown", - "id": "f2bd2819-9ff6-45fa-b2ba-8ec0580659d3", - "metadata": {}, - "source": [ - "There are dozens of other probability distributions that can be useful for modelling \n", - "\n", - "You can take a look at the probability distirbution graphs here\n", - "\n", - "TODO links to other panels of pdfs\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab2bd740-8baf-4642-a4a6-4399d37c9db1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "7a061b53-653c-4dab-a5f1-f0fcda47123e", - "metadata": {}, - "source": [ - "### Doing probability calculations" - ] - }, - { - "cell_type": "markdown", - "id": "4be97b22-714f-4364-bb59-865c5bbee078", - "metadata": {}, - "source": [ - "Calculating probabilities with the continuous random variable $Z$ requires using *integration*,\n", - "which the process of computing the total are under a curve for some region.\n", - "For example, \n", - "the probability that the random variable $Z$ will have a value somewhere\n", - "between $a$ and $b$ is defined as $\\textrm{Pr}(\\{a \\leq Z \\leq b\\}) = \\int_{z=a}^{z=b} f_Z(z) dz$.\n", - "\n", - "In words ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9ec594b-dfa7-471f-b689-ac82c586d11b", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.integrate import quad\n", - "quad(rvZ.pdf, 1, 2)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9763d430-2b9e-4358-998a-066ea1227f71", - "metadata": {}, - "outputs": [], - "source": [ - "# FIGURES ONLY\n", - "zs = np.linspace(-4, 4, 1000)\n", - "fZs = rvZ.pdf(zs)\n", - "ax = sns.lineplot(x=zs, y=fZs)\n", - "mask = (1 < zs) & (zs < 2)\n", - "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", - "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_1_to_2.png\")" - ] - }, - { - "cell_type": "markdown", - "id": "4806383c-0f6e-4922-958f-f74ab94c1765", - "metadata": {}, - "source": [ - "In statistics,\n", - "we often have to compute the probability in one or both tails of the distribution,\n", - "which corresponds the probability of observing \"extreme values\"\n", - "\n", - "$\\textrm{Pr}(\\{Z \\geq 2\\}) = \\int_{z=2}^{z=\\infty} f_Z(z) dz$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b986752-36ec-4aed-ae45-6aa0a19eed1d", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.integrate import quad\n", - "quad(rvZ.pdf, 2, np.inf)[0]" - ] - }, - { - "cell_type": "markdown", - "id": "a939e55f-3f47-429b-9130-1e0e3c31e08a", - "metadata": {}, - "source": [ - "The cumulative distribution function (CDF) $F_Z$ is defined as the integral \n", - "of the probability density function $f_Z$ up to some value $z=b$.\n", - "\n", - "$$\n", - " F_Z(b) = \\textrm{Pr}(\\{Z \\leq b\\}) = \\int_{z=-\\infty}^{z=b} f_Z(z)\\; dz.\n", - "$$\n", - "\n", - "The computer model `rvZ` provides the method `.cdf` which allows us to obtain the values of $F_Z$ directly.\n", - "For example, $F_Z(-2) = \\textrm{Pr}(\\{Z \\leq -2\\})$ can be computed as follows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "68239ec2-b5b0-480a-a013-0b6e9e1b4e62", - "metadata": {}, - "outputs": [], - "source": [ - "rvZ.cdf(-2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a043f55f-1cbe-4f0b-a38f-9c19adbb11e0", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "28cf8a39-4e3b-4f98-9f07-efb5dca10ce1", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "#### Discrete random variables (bonus topic)" - ] - }, - { - "cell_type": "markdown", - "id": "e7225000-23a5-478e-8a13-37ad2c8eb2f6", - "metadata": {}, - "source": [ - "There is a whole other type of random variables called \"discrete\" random variables,\n", - "which are defined only for integers, like $0$, $1$, $2$, etc.\n", - "\n", - "For example, the Poisson random variable $H$ is defined by the probability mass function,\n", - "\n", - "$$ \n", - " f_H(h) = \\frac{\\lambda^{h}e^{-\\lambda }}{h!},\n", - "$$\n", - "\n", - "for $h$ any natural number, $0, 1, 2, 3, \\ldots$.\n", - "The parameter $\\lambda$ (the Greek letter lambda) is used to control the shape of the distribution.\n", - "This math formula includes the lambda raised to the power $h$,\n", - "the exponential function $e^x$,\n", - "and the factorial function $n!$.\n", - "That's a lot of math!\n", - "If you need to do some probability calculations for the random variable $H$,\n", - "and you're ever forced to do the calculations using only pen and paper,\n", - "that would be quite the chore!" - ] - }, - { - "cell_type": "markdown", - "id": "cdbe61f9-d5b9-43c8-90b4-8a32db6330f4", - "metadata": {}, - "source": [ - "Wouldn't it be simpler (and more efficient) to define a Python function\n", - "that corresponds to the math function $f_H$,\n", - "then do all the calculations using Python as a calculator?" - ] - }, - { - "cell_type": "markdown", - "id": "7a447e70-187c-400a-af85-9bbec207f51e", - "metadata": {}, - "source": [ - "Let's see this in action!\n", - "We'll initialize a `poisson` model with the parameter $\\lambda=20$." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ddee77a-7061-4392-a886-59e15e719a3e", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.stats import poisson\n", - "\n", - "rvH = poisson(20)\n", - "# rvH" - ] - }, - { - "cell_type": "markdown", - "id": "5f1f8d6c-ebf1-468c-bde7-2342495563e4", - "metadata": {}, - "source": [ - "Having defined the computer model `rvH`, we can use it to:\n", - "- generate visualizations\n", - "- compute probabilities\n", - "- run simulations\n", - "- use `rvH` it as part of multi-step probability calculations\n", - "- anything else you might want to do with random variable $H$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a561d5b3-4d81-48a0-9bf7-afdd77a5fb3f", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "hs = np.arange(0,40)\n", - "fHs = rvH.pmf(hs)\n", - "plt.stem(fHs)" - ] - }, - { - "cell_type": "markdown", - "id": "3cb23373-0e13-408a-97c6-fa7405c0fb27", - "metadata": {}, - "source": [ - "Calculating the probability of $H$ being between $10$ and $20$\n", - "is done by summing over all the probabilities for that range of values of $h$.\n", - "\n", - "$$\n", - " \\textrm{Pr}(\\{10 \\leq H \\leq 20\\})\n", - " = \\sum_{h=10}^{h=20} f_H(h)\n", - " = f_H(10) + f_H(11) + f_H(12) + \\cdots + f_H(20).\n", - "$$\n", - "\n", - "This calculation can be done using a Python summation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "76ba3d2f-a01f-4d9d-b371-c99a55171c87", - "metadata": {}, - "outputs": [], - "source": [ - "sum([rvH.pmf(h) for h in range(10,20+1)])" - ] - }, - { - "cell_type": "markdown", - "id": "873749d7-803e-4747-b94e-0414aa717fbb", - "metadata": {}, - "source": [ - "To see a complete worked example based on the \n", - "see [Example 3: hard disk failures](https://minireference.com/static/excerpts/noBSstats/noBSstats_ch02_PROB.pdf#page=15) and\n", - "[Section 2.1.5 Hard disks example](https://minireference.com/static/excerpts/noBSstats/noBSstats_ch02_PROB.pdf#page=29) in the PDF preview of Chapter 2." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "305d9abd-6c0a-403c-981e-5691d5d80043", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "fb5bc2bd-a3d2-4f8e-b9d4-aa67908b5245", - "metadata": {}, - "source": [ - "### Running statistical simulations" - ] - }, - { - "cell_type": "markdown", - "id": "31162a34-a280-434e-b7a7-6bda0a7fa7b2", - "metadata": {}, - "source": [ - "#### Sampling distributions\n", - "\n", - "The *sampling distribution* of the mean for samples\n", - "of size $n=20$ from the standard normal distribution $Z \\sim \\mathcal{N}(0,1)$\n", - "is denoted $\\overline{\\mathbf{Z}} = \\mathbf{Mean}(\\mathbf{Z})$,\n", - "where $\\mathbf{Z} = (Z_1, Z_2, \\ldots, Z_{20})$ is a *random sample*.\n", - "\n", - "The random variable $\\overline{\\mathbf{Z}}$ describes the kind of means we can expect to observe if\n", - "we compute the mean for a sample of size $n=20$ from the standard normal.\n", - "\n", - "Let's generate $N=10$ samples $\\mathbf{z}_1, \\mathbf{z}_2, \\mathbf{z}_3, \\ldots, \\mathbf{z}_{10}$ of size $n=20$\n", - "from $Z \\sim \\mathcal{N}(0,1)$, and compute the mean in each sample.\n", - "\n", - "![samples_from_rvZ_n20_w_means_n_stds.png](./attachments/samples_from_rvZ_n20_w_means_n_stds.png)\n", - "\n", - "The diamond markers indicate the position of the sample means computed from each sample:\n", - "$[\\overline{\\mathbf{z}}_1, \\overline{\\mathbf{z}}_2, \\overline{\\mathbf{z}}_3, \\ldots, \\overline{\\mathbf{z}}_{10}]$.\n", - "\n", - "Now imagine we generate 9990 more samples to obtain a total of $N=10000$ samples from the population model:\n", - "$\\mathbf{z}_1, \\mathbf{z}_2, \\mathbf{z}_3, \\ldots, \\mathbf{z}_{1000}$.\n", - "We can visualize the sampling distribution of the mean $\\overline{\\mathbf{Z}} = \\texttt{mean}(\\mathbf{Z})$\n", - "by plotting a histogram of the means computed from the $10000$ random samples,\n", - "`zbars` = $[\\overline{\\mathbf{z}}_1, \\overline{\\mathbf{z}}_2, \\overline{\\mathbf{z}}_3, \\ldots, \\overline{\\mathbf{z}}_{1000}]$,\n", - "where $\\overline{\\mathbf{z}}_j$ denotes the sample mean computed from the data in the $j$th sample,\n", - "$\\overline{\\mathbf{z}}_j = \\texttt{mean}(\\mathbf{z}_j)$." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f27f30f-dd5c-4bf1-a4e4-f73b406b812e", - "metadata": {}, - "outputs": [], - "source": [ - "zbars = []\n", - "for i in range(0, 10000):\n", - " sample = rvZ.rvs(20)\n", - " zbar = mean(sample)\n", - " zbars.append(zbar)\n", - "\n", - "# zbars[0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86ea33f6-2eac-4a20-9f41-7404548872c4", - "metadata": {}, - "outputs": [], - "source": [ - "ax = sns.histplot(zbars)\n", - "\n", - "savefig(plt.gcf(), \"figures/hist_sampling_dist_mean_rvZ_n20.png\")" - ] - }, - { - "cell_type": "markdown", - "id": "98f5867a-2c76-4088-8f8b-a9cabe188a04", - "metadata": {}, - "source": [ - "The above figure shows the sampling distribution of the mean for samples of size $n=20$ from the standard normal.\n", - "The histogram shows the \"density of diamond shapes,\"\n", - "and provides a representation of the sampling distribution of the mean $\\overline{\\mathbf{Z}} = \\Mean(\\mathbf{Z})$." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "53ca2c67-5dfd-4f11-b09d-79df63f415a3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "285187ea-e9ef-4a20-bb1f-05a41aa0a259", - "metadata": {}, - "source": [ - "#### Verifying p-values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3cbe13fa-2b49-44d9-b1e1-5a13d7f08b71", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.stats import norm, ttest_1samp\n", - "\n", - "muK = 1000\n", - "sigmaK = 10\n", - "rvK = norm(muK, sigmaK)\n", - "\n", - "count = 0\n", - "for j in range(0, 10000):\n", - " sample = rvK.rvs(20)\n", - " res = ttest_1samp(sample, popmean=muK)\n", - " if res.pvalue < 0.05:\n", - " count = count + 1\n", - "\n", - "count / 10000" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28424dd6-5f88-4430-b687-b4b9c62ae0c7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "025f07b0-2443-4e6a-9779-6173bf37aa20", - "metadata": {}, - "source": [ - "#### Verifying confidence intervals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6bfd78ff-0981-48c4-9b9c-4a51b61b57b6", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "np.random.seed(10)\n", - "\n", - "muK = 1000\n", - "sigmaK = 10\n", - "rvK = norm(muK, sigmaK)\n", - "\n", - "count = 0\n", - "for j in range(0, 10000):\n", - " sample = rvK.rvs(20)\n", - " res = ttest_1samp(sample, popmean=1000)\n", - " ci = res.confidence_interval(confidence_level=0.90)\n", - " if ci.low <= muK <= ci.high:\n", - " count = count + 1\n", - "\n", - "count / 10000" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4230b1f6-e1c3-4958-86e4-0b610cd248f8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "1de45e91-cf6d-4c20-8d06-100488b548ad", - "metadata": {}, - "source": [ - "### Resampling methods\n", - "\n", - "Clever techniques that reuse data from observed sample to simulate the variability in the population.\n" - ] - }, - { - "cell_type": "markdown", - "id": "3029a7e9-2f11-4b1f-a179-947df756fd2d", - "metadata": {}, - "source": [ - "#### Bootstrap estimation" - ] - }, - { - "cell_type": "markdown", - "id": "f879348d-1bf5-434f-af5b-84ff45a20294", - "metadata": {}, - "source": [ - "Generate 5000 bootstrap samples (sampling with replacement) from the sample `pricesW`.\n", - "Use the bootstrap samples to approximate the sampling distribution of the mean." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7b71806-22c0-4ae0-9433-c1385e589be8", - "metadata": {}, - "outputs": [], - "source": [ - "n = len(pricesW)\n", - "xbars_boot = []\n", - "for i in range(0, 5000):\n", - " bsample = np.random.choice(pricesW, n, replace=True)\n", - " xbar_boot = mean(bsample)\n", - " xbars_boot.append(xbar_boot)\n", - "\n", - "sns.histplot(xbars_boot)\n", - "\n", - "savefig(plt.gcf(), \"figures/bootstrap_dist_mean_epricesW.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c5b254d-d587-488f-8037-89d17e45f7d2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "8b0a8e9e-3e3b-4ed8-9927-a546792932c3", - "metadata": {}, - "source": [ - "#### Permutation test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37420af1-c6c6-4594-9b0c-39ac34057189", - "metadata": {}, - "outputs": [], - "source": [ - "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", - "import pandas as pd\n", - "epriceswide = pd.read_csv(DATA_URL)\n", - "pricesW = epriceswide[\"West\"]\n", - "pricesE = epriceswide[\"East\"]" - ] - }, - { - "cell_type": "markdown", - "id": "3474a99e-c385-449b-85af-fb74ca97b297", - "metadata": {}, - "source": [ - "We'll compare the prices in the two parts of the city in terms\n", - "of the difference between the average price in each sample." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c8204bcd-6126-4e57-9198-ae3a62b45912", - "metadata": {}, - "outputs": [], - "source": [ - "def dmeans(xsample, ysample):\n", - " dhat = mean(xsample) - mean(ysample)\n", - " return dhat\n", - "\n", - "# Calculate the observed difference between means\n", - "dprice = dmeans(pricesW, pricesE)\n", - "dprice" - ] - }, - { - "cell_type": "markdown", - "id": "603905d8-59ae-457c-8535-852a6b54665f", - "metadata": {}, - "source": [ - "Obtain sampling distribution of the difference between means under the null hypothesis." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb0fd666-98ba-4a13-aa05-782b095886d8", - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "\n", - "pdhats = []\n", - "for i in range(0, 10000):\n", - " allprices = np.concatenate((pricesW, pricesE))\n", - " pallprices = np.random.permutation(allprices)\n", - " psampleW = pallprices[0:len(pricesW)]\n", - " psampleE = pallprices[len(pricesW):]\n", - " pdhat = dmeans(psampleW, psampleE)\n", - " pdhats.append(pdhat)" - ] - }, - { - "cell_type": "markdown", - "id": "f509ba5b-62e2-4710-90e7-29395685ff4b", - "metadata": {}, - "source": [ - "Compute the p-value of the observed difference between means `dprice` under the null hypothesis." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8cd155d6-635b-4b87-ba69-d731e27722f5", - "metadata": {}, - "outputs": [], - "source": [ - "tails = [d for d in pdhats if abs(d) > dprice]\n", - "pvalue = len(tails) / len(pdhats)\n", - "pvalue" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77788fef-cb44-4123-b71a-553f7206b9c5", - "metadata": {}, - "outputs": [], - "source": [ - "# plot the sampling distribution in blue\n", - "ax = sns.histplot(pdhats, bins=100)\n", - "\n", - "# plot red line for the observed statistic\n", - "plt.axvline(dprice, color=\"red\")\n", - "\n", - "# plot the values that are equal or more extreme in red\n", - "sns.histplot(tails, ax=ax, bins=100, color=\"red\")\n", - "_ = ax.set_ylabel(\"$f_{\\widehat{D}_0}$\")\n", - "\n", - "savefig(plt.gcf(), \"figures/pvalue_viz_permutation_test_eprices.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f812d89a-d782-4879-a7f0-51b587ca2a51", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "345999ef-eab8-4c4d-b590-38cffc41460f", - "metadata": {}, - "source": [ - "### Data cleaning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70161723-bfab-4810-9cef-49cff61aa74d", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", - "epriceswide = pd.read_csv(DATA_URL)\n", - "print(epriceswide)" - ] - }, - { - "cell_type": "markdown", - "id": "7a20430b-47bc-4eed-b536-d2c220f6bc3b", - "metadata": {}, - "source": [ - "Click [here](https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0A%0Aepriceswide_csv%20%3D%20'''%0AEast,West%0A7.7,11.8%0A5.9,10.0%0A7.0,11.0%0A4.8,8.6%0A6.3,8.3%0A6.3,9.4%0A5.5,8.0%0A5.4,6.8%0A6.5,8.5%0A'''%0A%0Aepriceswide%20%3D%20pd.read_csv%28io.StringIO%28epriceswide_csv%29%29%0A%0Aepriceswide.melt%28var_name%3D%22end%22,%20value_name%3D%22price%22%29&d=2023-07-02&lang=py&v=v1) to see a visualization of the above melt operation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2172968-641c-4951-a722-029ac2bc853b", - "metadata": {}, - "outputs": [], - "source": [ - "eprices = pd.melt(epriceswide, var_name=\"end\", value_name='price')\n", - "print(eprices)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37ed70f2-0a06-42bb-be5f-9bef646f44f2", - "metadata": {}, - "outputs": [], - "source": [ - "pricesW = eprices[eprices[\"end\"]==\"West\"][\"price\"]\n", - "pricesE = eprices[eprices[\"end\"]==\"East\"][\"price\"]\n", - "\n", - "pricesW.values, pricesE.values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "157b54eb-72ff-404a-a5c9-7b330fdba7dc", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "f84ed421-3c6e-40cf-bd87-53d286620a07", - "metadata": {}, - "source": [ - "### Statistics procedures as code" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a362074c-72c6-477c-8245-2af319c0a390", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "3819cfff-67ec-4a1d-ba2a-9070c49d24d4", - "metadata": {}, - "source": [ - "#### Generating sampling distributions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c717d785-0490-4807-97a3-4c8c94666e95", - "metadata": {}, - "outputs": [], - "source": [ - "def gen_sampling_dist(rv, estfunc, n, N=10000):\n", - " \"\"\"\n", - " Simulate `N` samples of size `n` from the random variable `rv` to\n", - " generate the sampling distribution of the estimator `estfunc`.\n", - " \"\"\"\n", - " estimates = []\n", - " for i in range(0, N):\n", - " sample = rv.rvs(n)\n", - " estimate = estfunc(sample)\n", - " estimates.append(estimate)\n", - " return estimates\n", - "\n", - "zbars = gen_sampling_dist(rvZ, estfunc=mean, n=20)\n", - "sns.histplot(zbars)" - ] - }, - { - "cell_type": "markdown", - "id": "013420b6-a902-44a9-b0d8-c46d53209a5a", - "metadata": {}, - "source": [ - "#### Generating bootstrap approximations to sampling distributions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ddb08ce-799d-4388-9371-95dcd47866e3", - "metadata": {}, - "outputs": [], - "source": [ - "def gen_boot_dist(sample, estfunc, B=5000):\n", - " \"\"\"\n", - " Generate estimates from the sampling distribution of the estimator `estfunc`\n", - " based on `B` bootstrap samples (sampling with replacement) from `sample`.\n", - " \"\"\"\n", - " n = len(sample)\n", - " bestimates = []\n", - " for i in range(0, B):\n", - " bsample = np.random.choice(sample, n, replace=True)\n", - " bestimate = estfunc(bsample)\n", - " bestimates.append(bestimate)\n", - " return bestimates\n", - "\n", - "\n", - "zbars_boot = gen_boot_dist(pricesW, estfunc=mean)\n", - "sns.histplot(zbars_boot)" - ] - }, - { - "cell_type": "markdown", - "id": "0cac9088-21b9-4642-b9e1-9ca61571e3ff", - "metadata": {}, - "source": [ - "#### The permutation test for comparing two groups" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3359c3d-32b2-44bc-863a-c35c2a8e5e24", - "metadata": {}, - "outputs": [], - "source": [ - "def permutation_test_dmeans(xsample, ysample, P=10000):\n", - " \"\"\"\n", - " Compute the p-value of the observed difference between means\n", - " `dmeans(xsample,ysample)` under the null hypothesis where\n", - " the group membership is randomized.\n", - " \"\"\"\n", - " # 1. Compute the observed difference between means\n", - " obsdhat = dmeans(xsample, ysample)\n", - "\n", - " # 2. Get sampling dist. of `dmeans` under H0\n", - " pdhats = []\n", - " allprices = np.concatenate((pricesW, pricesE))\n", - " for i in range(0, P):\n", - " pallprices = np.random.permutation(allprices)\n", - " psampleW = pallprices[0:len(pricesW)]\n", - " psampleE = pallprices[len(pricesW):]\n", - " pdhat = dmeans(psampleW, psampleE)\n", - " pdhats.append(pdhat)\n", - "\n", - " # 3. Compute the p-value\n", - " tails = [d for d in pdhats if abs(d) > obsdhat]\n", - " pvalue = len(tails) / len(pdhats)\n", - " return pvalue\n", - "\n", - "np.random.seed(42)\n", - "permutation_test_dmeans(pricesW, pricesE)" - ] - }, - { - "cell_type": "markdown", - "id": "5889118e-4d74-425c-8e84-c4eef002dadf", - "metadata": {}, - "source": [ - "\n", - "See the file [stats_helpers.py](https://github.com/minireference/noBSstatsnotebooks/blob/main/notebooks/stats_helpers.py)\n", - "for more examples of Python functions that \n", - "for definitions all the important statistical analysis procedures in STATS 101.\n", - "\n", - "In the past, students first contact with statistics was presented as a bunch of procedures\n", - "without explanation, and students were supposed to memorize when to use which \"recipe\".\n", - "Statistics instructors always had to \"skip the details\" because it's super complicated to\n", - "explain all the details (probability models, sampling distributions, p-value calculations, etc.).\n", - "\n", - "Now that we have Python on our side, we don't have to water-down the material,\n", - "but can instead show all the detailed calculations for statistical tests,\n", - "as easy-to-understand Python source code, which makes it much much easier to understand what is going on.\n", - "Currently,\n", - "the file [stats_helpers.py](https://github.com/minireference/noBSstatsnotebooks/blob/main/notebooks/stats_helpers.py)\n", - "is 400 lines of code.\n", - "With a little bit of Python knowledge,\n", - "you can read this file and understand all of statistics." - ] - }, - { - "cell_type": "markdown", - "id": "d23d66f9-3842-41c4-b6d4-48e039a9c42b", - "metadata": {}, - "source": [ - "## How much Python do you need to know?\n", - "\n", - "I remind you the key aspect is to learn how to use Python as a calculator.\n", - "\n", - "I talked about the `for`-loops and function definitions only to make sure you can **read Python code**,\n", - "but you don't need to write any such code to learn statistics.\n", - "As long as you know how to call functions and run code cells in a notebook,\n", - "then you'll still benefit from all the educational power that Python has to offer.\n", - "\n" + "## How much Python do you need to know?\n", + "\n", + "I remind you the key aspect is to learn how to use Python as a calculator.\n", + "\n", + "I talked about the `for`-loops and function definitions only to make sure you can **read Python code**,\n", + "but you don't need to write any such code to learn statistics.\n", + "As long as you know how to call functions and run code cells in a notebook,\n", + "then you'll still benefit from all the educational power that Python has to offer.\n", + "\n" ] }, { @@ -1866,8 +1104,8 @@ " - [Outline of the stats curriculum research](https://minireference.com/blog/fixing-the-introductory-statistics-curriculum/)\n", " - [Book proposal](https://minireference.com/blog/no-bullshit-guide-to-statistics-progress-update/)\n", " - [Stats survey results](https://minireference.com/blog/what-stats-do-people-want-to-learn/)\n", - "- [There's Only One Test](https://www.youtube.com/watch?v=S41zQEshs5k) talk by Allen B. Downey\n", - "- [Statistics for Hackers](https://www.youtube.com/watch?v=Iq9DzN6mvYA) talk by Jake Vanderplas\n" + " - Part 2\n", + " - Part 3" ] }, { @@ -1909,305 +1147,6 @@ "source": [ "_____" ] - }, - { - "cell_type": "markdown", - "id": "345abe39-4b08-4ec3-8f8f-e0744cccf067", - "metadata": { - "tags": [] - }, - "source": [ - "# CUT MATERIAL" - ] - }, - { - "cell_type": "markdown", - "id": "c55a4b4b-af80-455c-94ea-5c4fdc526db1", - "metadata": {}, - "source": [ - "#### Pandas equivalent" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ad028fd-5c2d-4423-a113-42f883ab3538", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "gseries = pd.Series(grades)\n", - "gseries.mean()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3304087a-cf2c-4243-9c24-a85f43c69795", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "7ce05d09-db37-41c1-b083-0df4f34a16f9", - "metadata": {}, - "source": [ - "$N \\sim \\mathcal{N}(\\mu,\\sigma)$ has the probability density function:\n", - "\n", - "$$\n", - " f_N(x) = \\tfrac{1}{\\sigma\\sqrt{2\\pi}} e^{ -\\frac{1}{2} \\left( \\frac{x-\\mu}{\\sigma} \\right)^2 },\n", - "$$\n", - "\n", - "where $\\mu$ is the mean and $\\sigma$ is the standard deviation.\n", - "We use the notation $\\mathcal{N}(\\mu, \\sigma)$ to describe the distribution as math,\n", - "and `norm(mu,sigma)` to describe as computer model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2eb43e7b-273d-4f31-81ac-e2927aa6fbdc", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbc42451-cf62-417f-a9ed-0d3c608ca935", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "def fN(x, mu=0, sigma=1):\n", - " const = 1 / (sigma*np.sqrt(2*np.pi))\n", - " exp = np.exp( -1/2 * ( (x-mu)/sigma )**2 )\n", - " return const * exp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e526f8ba-33f9-4d36-a6cf-49b655d47d03", - "metadata": {}, - "outputs": [], - "source": [ - "fN(3, 2, 3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12fa2efa-a483-421b-ad6b-376c79f33090", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e55c4dd3-fbb0-4044-91cf-b6ee104ba292", - "metadata": {}, - "outputs": [], - "source": [ - "def mean(sample):\n", - " total = 0\n", - " for xi in sample:\n", - " total = total + xi\n", - " avg = total / len(sample)\n", - " return avg" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba8aaac6-3208-45bf-865c-9d73553caa24", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5900b927-4053-4a93-b12a-1d6801ccc91a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25a881c3-8725-466b-9d71-cc646a3febe4", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "e42d0803-84f2-4efc-b4d0-aded6496f610", - "metadata": {}, - "source": [ - "### Problem NN (numerical math considerations)\n", - "\n", - "We'll use the Python library NumPy (module `numpy` imported as `np`) \n", - "to help us with the fancy math operations.\n", - "To compute $e^x$ we can call `np.exp(x)`,\n", - "and to compute the factorial of `n` we can call `np.math.factorial(n)`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fda48f99-b838-4784-89aa-7ef40742938e", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "def fH(h):\n", - " lam = 20\n", - " return lam**h * np.exp(-lam) / np.math.factorial(h)\n", - "\n", - "# calculation is not stable for h > 14\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "hs = np.arange(0,40)\n", - "fHs = [fH(h) for h in hs]\n", - "plt.stem(fHs)" - ] - }, - { - "cell_type": "markdown", - "id": "6733bd48-7efd-4576-85d9-1fb06d106b04", - "metadata": {}, - "source": [ - "We can apply the log-trick to the formula for ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21d0b54d-e5e9-446a-83e9-517c4102c8e7", - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.special import gammaln\n", - "\n", - "def fHalt(h):\n", - " lam = 20\n", - " return np.exp(h * np.log(lam) - lam - gammaln(h + 1))\n", - "\n", - "fHalts = [fHalt(h) for h in hs]\n", - "plt.stem(fHalts)" - ] - }, - { - "cell_type": "markdown", - "id": "e690dd10-13ba-4935-8004-5a15a58c8c4f", - "metadata": {}, - "source": [ - "The log-transform trick and `gammaln` function are really useful for dealing with large factorials and multiplications of small probabilities,\n", - "which occur a lot in statistical calculations.\n", - "The need for numerical stability is one thing you need to keep in mind when\n", - "you implement statistical algorithms in production." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b21aac3c-0921-4e58-8e77-b1a9980928ec", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfa36e08-6a82-4939-a5e9-62cfc4dcd3a8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "01a6e381-d092-4ea4-906e-fe2b161bff55", - "metadata": {}, - "source": [ - "The cumulative distribution function (CDF) $F_Z$ is defined as the integral \n", - "of the probability density function $f_Z$ up to some value $z=b$.\n", - "\n", - "$$\n", - " \\textrm{Pr}(\\{Z \\leq b\\}) = F_Z(b) = \\int_{z=-\\infty}^{z=b} f_Z(z)\\; dz.\n", - "$$\n", - "\n", - "The computer model `rvZ` provides the method `.cdf` which allows us to obtain the values of $F_Z$ directly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36b0686c-fe69-49cb-98ec-22baa56de926", - "metadata": {}, - "outputs": [], - "source": [ - "rvZ.cdf(2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5cec486c-9b1e-4748-939e-23ffecd257ac", - "metadata": {}, - "outputs": [], - "source": [ - "# FIGURES ONLY\n", - "zs = np.linspace(-4, 4, 1000)\n", - "fZs = rvZ.pdf(zs)\n", - "ax = sns.lineplot(x=zs, y=fZs)\n", - "mask = (zs < 2)\n", - "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", - "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_-infty_to_2.png\")" - ] - }, - { - "cell_type": "markdown", - "id": "b1340dda-d1cb-4189-b1ac-fe1546a2774d", - "metadata": {}, - "source": [ - "We're often interested in computing the complement,\n", - "\n", - "$$\n", - " \\textrm{Pr}(\\{Z \\geq b\\}) = 1- F_Z(b) = \\int_{z=b}^{z=\\infty} f_Z(z) \\; dz.\n", - "$$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3750693-6415-4b09-9642-df1f7f8e0f29", - "metadata": {}, - "outputs": [], - "source": [ - "1 - rvZ.cdf(2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fa95c21-9874-4021-a629-79baea87433a", - "metadata": {}, - "outputs": [], - "source": [ - "# FIGURES ONLY\n", - "zs = np.linspace(-4, 4, 1000)\n", - "fZs = rvZ.pdf(zs)\n", - "ax = sns.lineplot(x=zs, y=fZs)\n", - "mask = (zs > 2)\n", - "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", - "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_2_to_infty.png\")" - ] } ], "metadata": { @@ -2226,7 +1165,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.15" + "version": "3.9.4" } }, "nbformat": 4, diff --git a/blogposts/python_for_stats2.ipynb b/blogposts/python_for_stats2.ipynb new file mode 100644 index 00000000..3aa700c7 --- /dev/null +++ b/blogposts/python_for_stats2.ipynb @@ -0,0 +1,1335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ae0788bf-c856-4118-adfe-8028b4f14fff", + "metadata": {}, + "source": [ + "# Using Python for learning statistics Part 2" + ] + }, + { + "cell_type": "markdown", + "id": "ff7f1294-525c-4adc-a95a-ce8465d568dd", + "metadata": {}, + "source": [ + "This Juppyter notebook contains the code examples form the blog post [Python coding skills for statistics Part 2](https://docs.google.com/document/d/1XusbfJoZ7CQxbeWPPXMM84BA-VeUn8lkPUuUy7RcW8M/edit).\n", + "\n", + "I've intentionally left empty code cells throughout the notebook,\n", + "which you can use to try some Python commands on your own.\n", + "For example,\n", + "you can copy-paste some of the commands in previous cells,\n", + "modify them and run to see what happens.\n", + "Try to break things.\n", + "\n", + "**To run a code cell, press** the play button in the menu bar, or use the keyboard shortcut **SHIFT+ENTER**." + ] + }, + { + "cell_type": "markdown", + "id": "7afd4148-7f0f-4cdc-a11f-7bf8be06c52c", + "metadata": { + "tags": [] + }, + "source": [ + "### Notebook setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0042b56d-8c03-4fe5-8119-6cb1043080d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Figures setup\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.clf() # needed otherwise `sns.set_theme` doesn't work\n", + "sns.set_theme(\n", + " style=\"whitegrid\",\n", + " rc={'figure.figsize': (6.25, 2.0)},\n", + ")\n", + "# High-resolution figures please\n", + "%config InlineBackend.figure_format = 'retina'\n", + "\n", + "def savefig(fig, filename):\n", + " fig.tight_layout()\n", + " fig.savefig(filename, dpi=300, bbox_inches=\"tight\", pad_inches=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2a17d1d-a564-4939-89c4-49283d08a675", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "66910667-e79e-4c51-8c31-6b1d4ca1ee24", + "metadata": {}, + "source": [ + "### Understanding probability distributions" + ] + }, + { + "cell_type": "markdown", + "id": "ea2106b1-9747-43fb-bda2-e9bc058156b3", + "metadata": {}, + "source": [ + "A random variable ...\n", + "\n", + "described by\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5cff0c-043f-46e2-b9dd-55b61562311f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16767a22-5e52-4303-9ac6-00380ac765ea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "fc0eb9a6-d8fa-44f3-8de8-5f3f6601711f", + "metadata": {}, + "source": [ + "#### Building computer models for probability distributions" + ] + }, + { + "cell_type": "markdown", + "id": "8b85e7a0-dfd3-42ab-a4ba-1e6605018b6c", + "metadata": {}, + "source": [ + "The standard normal distribution is denoted $Z \\sim \\mathcal{N}(\\mu=0,\\sigma=1)$,\n", + "where $Z$ is the name has the probability density function:\n", + "\n", + "$$\n", + " f_Z(z) = \\tfrac{1}{\\sqrt{2\\pi}} e^{ - \\frac{1}{2}z^2}.\n", + "$$\n", + "\n", + "The standard normal is a special case of the general normal $\\mathcal{N}(\\mu, \\sigma)$\n", + "where $\\mu$ is the mean and $\\sigma$ is the standard deviation.\n" + ] + }, + { + "cell_type": "markdown", + "id": "1a3454db-6832-46d3-af83-a8615f1154de", + "metadata": {}, + "source": [ + "To create a computer model for the random variable $Z$,\n", + "we can define the following Python function that performs the same calculation as the math function $f_Z$." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "eabedcd7-cb40-4806-9eab-f1e860a9b14c", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def fZ(z):\n", + " const = 1 / np.sqrt(2*np.pi)\n", + " exp = np.exp(-1/2 * z**2)\n", + " return const*exp" + ] + }, + { + "cell_type": "markdown", + "id": "aafee685-fdcc-4a54-b0b5-17d2e79fedcd", + "metadata": {}, + "source": [ + "Note the definition of the Python function `fZ` matches exactly the \n", + "calculations described in the complicated-looking math definition of $f_Z$ we saw above.\n", + "This is one of the key benefits of learning Python:\n", + "you can convert any math expressions into code expressions\n", + "then do computations with it." + ] + }, + { + "cell_type": "markdown", + "id": "fc99acc6-9ff2-4543-9a6d-e2ee3d19edb5", + "metadata": {}, + "source": [ + "We can now compute the value $f_Z(1)$ by calling the function `fZ` with input `1`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cea5af88-b561-4d81-916f-dc13cb9145aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.24197072451914337" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fZ(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e06bc6b-03bb-4da3-bc6d-097bec212140", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "91d4e038-e6a5-4d09-9a15-d7fea12d56d6", + "metadata": {}, + "source": [ + "#### Predefined computer models\n", + "\n", + "Instead of defining our own function to use for computations,\n", + "we can use one of the pre-defined probability model families in the SciPy library.\n", + "\n", + "To create a computer model for the standard normal random variable $Z \\sim \\mathcal{N}(\\mu=0, \\sigma=1)$,\n", + "we need to \"import\" the `norm` model family form `scipy.stats` then call `norm(0,1)`\n", + "to initialize the model with parameters $\\mu=0$ and $\\sigma=1$." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bbaf06c0-d415-46fd-9002-70896cd3b00c", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import norm\n", + "rvZ = norm(0,1)\n", + "# rvZ" + ] + }, + { + "cell_type": "markdown", + "id": "4e5ecbdb-69ab-4d01-b81b-edeaa41c5923", + "metadata": {}, + "source": [ + "The probability density function $f_Z$ is available as the `.pdf` method on the model `rvZ`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6a34bd0-37ca-4347-a82f-9cfbabcdf2c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.24197072451914337" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rvZ.pdf(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fab2bab-dfac-4ce4-960d-b614a8bf425c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "d5db9eb1-9f05-4828-8857-00d0c19e68b8", + "metadata": {}, + "source": [ + "#### Probability model visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b1061670-cdb0-4de7-8136-d343c5cf17b6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 186, + "width": 605 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "zs = np.linspace(-4, 4)\n", + "fZs = rvZ.pdf(zs)\n", + "sns.lineplot(x=zs, y=fZs)\n", + "\n", + "# FIGURES ONLY\n", + "ax = sns.lineplot(x=zs, y=fZs, color=\"b\")\n", + "ax.set_xlabel(\"$z$\")\n", + "ax.set_ylabel(\"$f_Z$\")\n", + "savefig(ax.figure, \"figures/pdf_of_rvZ.png\")" + ] + }, + { + "cell_type": "markdown", + "id": "aecdb50b-591f-4f82-b89f-e0c9de1a7f09", + "metadata": {}, + "source": [ + "The above graph tells you everything you need to know about the random variable $Z$.\n", + "The possible values of $Z$ are concentrated around the mean $\\mu=0$.\n", + "The region of highest density is roughly between $z=-1$ and $z=1$,\n", + "with most of values between $z=-2$ and $z=2$,\n", + "then the probability densities drops off to form long tails." + ] + }, + { + "cell_type": "markdown", + "id": "33bbcbf6-d36f-41e1-865f-a015411faf91", + "metadata": {}, + "source": [ + "The above graph shows the \"shape\" of the normal distribution $\\mathcal{N}(\\mu=0, \\sigma=1)$,\n", + "which is just one representative of the general normal distribution.\n", + "Here some examples of graphs of the normal distribution for choices of the parameters $\\mu$ and $\\sigma$\n", + "to give you an idea of what they do.\n", + "\n", + "![normal_panel.png](./attachments/normal_panel.png)" + ] + }, + { + "cell_type": "markdown", + "id": "6cf4ed2a-9077-4091-9c1a-85c25bc08451", + "metadata": {}, + "source": [ + "There are dozens of other probability distributions that can be useful for modelling \n", + "\n", + "You can take a look at the probability distirbution graphs here\n", + "\n", + "TODO links to other panels of pdfs\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab2bd740-8baf-4642-a4a6-4399d37c9db1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "af748763-9483-494b-972c-5e70e655b945", + "metadata": {}, + "source": [ + "### Doing probability calculations" + ] + }, + { + "cell_type": "markdown", + "id": "5013327a-4104-4586-af4a-1cafbe20fb1f", + "metadata": {}, + "source": [ + "Calculating probabilities with the continuous random variable $Z$ requires using *integration*,\n", + "which the process of computing the total are under a curve for some region.\n", + "For example, \n", + "the probability that the random variable $Z$ will have a value somewhere\n", + "between $a$ and $b$ is defined as $\\textrm{Pr}(\\{a \\leq Z \\leq b\\}) = \\int_{z=a}^{z=b} f_Z(z) dz$.\n", + "\n", + "In words ..." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f9ec594b-dfa7-471f-b689-ac82c586d11b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.13590512198327787" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.integrate import quad\n", + "quad(rvZ.pdf, 1, 2)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9763d430-2b9e-4358-998a-066ea1227f71", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 608 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# FIGURES ONLY\n", + "zs = np.linspace(-4, 4, 1000)\n", + "fZs = rvZ.pdf(zs)\n", + "ax = sns.lineplot(x=zs, y=fZs)\n", + "mask = (1 < zs) & (zs < 2)\n", + "ax.fill_between(zs[mask], y1=fZs[mask], alpha=0.6, facecolor=\"red\")\n", + "savefig(ax.figure, \"figures/pdf_of_rvZ_highlight_1_to_2.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a043f55f-1cbe-4f0b-a38f-9c19adbb11e0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "a61a9bf2-775f-43a8-8342-1514a6bfabed", + "metadata": {}, + "source": [ + "#### Discrete random variables (bonus topic)" + ] + }, + { + "cell_type": "markdown", + "id": "67dcea42-6159-40e2-925e-cd36d56c486e", + "metadata": {}, + "source": [ + "There is a whole other type of random variables called \"discrete\" random variables,\n", + "which are defined only for integers, like $0$, $1$, $2$, etc.\n", + "\n", + "For example, the Poisson random variable $H$ is defined by the probability mass function,\n", + "\n", + "$$ \n", + " f_H(h) = \\frac{\\lambda^{h}e^{-\\lambda }}{h!},\n", + "$$\n", + "\n", + "for $h$ any natural number, $0, 1, 2, 3, \\ldots$.\n", + "The parameter $\\lambda$ (the Greek letter lambda) is used to control the shape of the distribution.\n", + "This math formula includes the lambda raised to the power $h$,\n", + "the exponential function $e^x$,\n", + "and the factorial function $n!$.\n", + "That's a lot of math!\n", + "If you need to do some probability calculations for the random variable $H$,\n", + "and you're ever forced to do the calculations using only pen and paper,\n", + "that would be quite the chore!" + ] + }, + { + "cell_type": "markdown", + "id": "24b53a6f-660e-4b5f-b9e8-468adba7acd8", + "metadata": {}, + "source": [ + "Wouldn't it be simpler (and more efficient) to define a Python function\n", + "that corresponds to the math function $f_H$,\n", + "then do all the calculations using Python as a calculator?" + ] + }, + { + "cell_type": "markdown", + "id": "b0372c94-cce4-4b4d-9eed-86133e401e4e", + "metadata": {}, + "source": [ + "Let's see this in action!\n", + "We'll initialize a `poisson` model with the parameter $\\lambda=20$." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5ddee77a-7061-4392-a886-59e15e719a3e", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import poisson\n", + "\n", + "rvH = poisson(20)\n", + "# rvH" + ] + }, + { + "cell_type": "markdown", + "id": "9649298a-31b1-4872-ab9b-c4aebda0575b", + "metadata": {}, + "source": [ + "Having defined the computer model `rvH`, we can use it to:\n", + "- generate visualizations\n", + "- compute probabilities\n", + "- run simulations\n", + "- use `rvH` it as part of multi-step probability calculations\n", + "- anything else you might want to do with random variable $H$" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a561d5b3-4d81-48a0-9bf7-afdd77a5fb3f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 201, + "width": 547 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "hs = np.arange(0,40)\n", + "fHs = rvH.pmf(hs)\n", + "plt.stem(fHs)" + ] + }, + { + "cell_type": "markdown", + "id": "9c6c2b8d-fef5-4e07-b129-38595604bb30", + "metadata": {}, + "source": [ + "Calculating the probability of $H$ being between $10$ and $20$\n", + "is done by summing over all the probabilities for that range of values of $h$.\n", + "\n", + "$$\n", + " \\textrm{Pr}(\\{10 \\leq H \\leq 20\\})\n", + " = \\sum_{h=10}^{h=20} f_H(h)\n", + " = f_H(10) + f_H(11) + f_H(12) + \\cdots + f_H(20).\n", + "$$\n", + "\n", + "This calculation can be done using a Python summation:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "76ba3d2f-a01f-4d9d-b371-c99a55171c87", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5540971719230157" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum([rvH.pmf(h) for h in range(10,20+1)])" + ] + }, + { + "cell_type": "markdown", + "id": "1de172ba-04ae-485f-adff-de6dd0fcc0f8", + "metadata": {}, + "source": [ + "To see a complete worked example based on the \n", + "see [Example 3: hard disk failures](https://minireference.com/static/excerpts/noBSstats/noBSstats_ch02_PROB.pdf#page=15) and\n", + "[Section 2.1.5 Hard disks example](https://minireference.com/static/excerpts/noBSstats/noBSstats_ch02_PROB.pdf#page=29) in the PDF preview of Chapter 2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "305d9abd-6c0a-403c-981e-5691d5d80043", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "9aacbce4-f4e7-4d7a-a72e-5498d116e4f2", + "metadata": {}, + "source": [ + "### Running statistical simulations" + ] + }, + { + "cell_type": "markdown", + "id": "01a85daf-7975-44a4-a32f-516b04c5c4c3", + "metadata": {}, + "source": [ + "#### Sampling distributions\n", + "\n", + "The *sampling distribution* of the mean for samples\n", + "of size $n=20$ from the standard normal distribution $Z \\sim \\mathcal{N}(0,1)$\n", + "is denoted $\\overline{\\mathbf{Z}} = \\mathbf{Mean}(\\mathbf{Z})$,\n", + "where $\\mathbf{Z} = (Z_1, Z_2, \\ldots, Z_{20})$ is a *random sample*.\n", + "\n", + "The random variable $\\overline{\\mathbf{Z}}$ describes the kind of means we can expect to observe if\n", + "we compute the mean for a sample of size $n=20$ from the standard normal.\n", + "\n", + "Let's generate $N=10$ samples $\\mathbf{z}_1, \\mathbf{z}_2, \\mathbf{z}_3, \\ldots, \\mathbf{z}_{10}$ of size $n=20$\n", + "from $Z \\sim \\mathcal{N}(0,1)$, and compute the mean in each sample.\n", + "\n", + "![samples_from_rvZ_n20_w_means_n_stds.png](./attachments/samples_from_rvZ_n20_w_means_n_stds.png)\n", + "\n", + "The diamond markers indicate the position of the sample means computed from each sample:\n", + "$[\\overline{\\mathbf{z}}_1, \\overline{\\mathbf{z}}_2, \\overline{\\mathbf{z}}_3, \\ldots, \\overline{\\mathbf{z}}_{10}]$.\n", + "\n", + "Now imagine we generate 9990 more samples to obtain a total of $N=10000$ samples from the population model:\n", + "$\\mathbf{z}_1, \\mathbf{z}_2, \\mathbf{z}_3, \\ldots, \\mathbf{z}_{1000}$.\n", + "We can visualize the sampling distribution of the mean $\\overline{\\mathbf{Z}} = \\texttt{mean}(\\mathbf{Z})$\n", + "by plotting a histogram of the means computed from the $10000$ random samples,\n", + "`zbars` = $[\\overline{\\mathbf{z}}_1, \\overline{\\mathbf{z}}_2, \\overline{\\mathbf{z}}_3, \\ldots, \\overline{\\mathbf{z}}_{1000}]$,\n", + "where $\\overline{\\mathbf{z}}_j$ denotes the sample mean computed from the data in the $j$th sample,\n", + "$\\overline{\\mathbf{z}}_j = \\texttt{mean}(\\mathbf{z}_j)$." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "876e6c4a-6d3a-47c7-a028-30924231f60e", + "metadata": {}, + "outputs": [], + "source": [ + "def mean(sample):\n", + " total = sum(sample)\n", + " avg = total / len(sample)\n", + " return avg" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1f27f30f-dd5c-4bf1-a4e4-f73b406b812e", + "metadata": {}, + "outputs": [], + "source": [ + "zbars = []\n", + "for i in range(0, 10000):\n", + " sample = rvZ.rvs(20)\n", + " zbar = mean(sample)\n", + " zbars.append(zbar)\n", + "\n", + "# zbars[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "86ea33f6-2eac-4a20-9f41-7404548872c4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABMIAAAFxCAYAAACREQTcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAB7CAAAewgFu0HU+AABKfUlEQVR4nO3de3wU1f3/8fdu7iGEEETuggZiFIEioFS0lktFUG5yB80XWmoVgyBaL6VYUC76tdqviteiSItyK4IGLPiFgIIigvIVDXf0pxJAhJCEJISE7Pz+SDNsbptNMrvZZF7Px8OHk8w5Zw772ZPZ/cycMw7DMAwBAAAAAAAA9ZyztjsAAAAAAAAA+AOJMAAAAAAAANgCiTAAAAAAAADYAokwAAAAAAAA2AKJMAAAAAAAANgCiTAAAAAAAADYAokwAAAAAAAA2AKJMAAAAAAAANgCiTAAAAAAAADYAokwAAAAAAAA2AKJMAAAAAAAANgCiTAAAAAAAADYAokwAAAAAAAA2AKJMAAAAAAAANgCiTAAAAAAAADYAokwAAAAAAAA2AKJMAAAAAAAANhCcG13AHVDfn6+MjIyzJ/DwsIUFBRUex0CAAAAAAABpbCwUOfPnzd/jomJUWhoaC32qCwSYfBKRkaGfvzxx9ruBgAAAAAAqEMuvfTS2u5CCUyNBAAAAAAAgC2QCAMAAAAAAIAtMDUSXgkLCyvxc5s2bRQZGWlJ24cPH1ZhYaGCgoLUvn17S9pE4CPu9kTc7YeY2xNxtyfibk/E3Z6Iuz15E/fc3NwSyyqVziUEAhJh8ErphfEjIyMVFRVlSdtOp1OFhYVyOp2WtYnAR9ztibjbDzG3J+JuT8Tdnoi7PRF3e6pO3APxIXtMjQQAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAECd4XIZfq0HAADql+Da7gAAAADgLafToUXJqUrPyvO6Tmx0uCYO6ujDXgEAgLqCRBgAAADqlPSsPJ3O9D4RBgAAUIypkQAAAAAAALAFEmEAAAAAAACwBRJhAAAAYBF6AABgC6wRBgAAABahBwAAtkAiDAAAAJJYhB4AANR/TI0EAAAAAACALZAIAwAAQL0WERbMGmgAAEASUyMBAABQz4WHBrEGGgAAkEQiDAAAADbBGmgAAICpkQAAAPA7phwCAIDawB1hAAAA8LvqTFVs1yJaw/t08GGvAABAfUciDAAAALWiqlMVGzcM82FvAACAHTA1EgAAAAAAALZAIgwAAAAAAAC2QCIMAADAx6qzMDyLyQMAAFiPNcIAAAB8rKoLw8dGh2vioI4+7hUAAID9kAgDAADwg6ouDA8AAADrMTUSAAAAAAAAtsAdYQAAALBUaGionE6ngoP5qAkAAAILn04AAABQLRFhwXK5DDmdjhK/T0hIqKUeAQAAeEYiDAAAANUSHhpU7oMAcs/lynAZcjgdioyILFOvXYtoDe/TwZ9dBQAAkEQiDAAAADVU+kEA2TkXE2FR+WWXpG3cMMyf3QMAADCxWD4AAEA94nIZtd0FAACAgMUdYQAAAPVIeVMVK8NURQAAYBckwgAAAOqZ0lMVK8NURQAAYBdMjQQAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAPCSy2XUdhcAAABQA8G13QEAAIC6wul0aFFyqtKz8ryu065FtIb36eDDXgEAAMBbJMIAAACqID0rT6czvU+ENW4Y5sPeAAAAoCqYGgkAAAAAAABbIBEGAAAAlCMiLLja68KxnhwAAIGJqZEAAAABpjgB43Q6arsrthYeGlStdeFio8M1cVBHH/YMAABUF4kwAACAAFPdBAwL8/tGVdeFAwAAgYtEGAAAQIBiYX4AAABrsUYYAAAAAAAAbIFEGAAAAAAAAGyBqZEeZGVl6bbbbtPJkyc1bNgwPfXUUxWWNQxDa9eu1apVq7Rv3z7l5uaqadOm6tGjh8aPH6/OnTtXejwr2gAAAAAAAED5SIR5MG/ePJ08ebLScnl5eZo6daq2bNlS4vdpaWlKS0tTcnKypk2bprvvvtunbQAAAAAAAKBiJMIqsGXLFq1evdqrsjNmzDATWHFxcRo1apQuueQSpaamatmyZcrNzdWzzz6rZs2aaciQIT5rAwAAAAAAABUjEVaOrKwszZw506uyn3zyidauXStJ6tmzp15//XWFhRU9sen222/XiBEjNG7cOGVkZGjevHnq27evoqKiLG8DAAAAAAAAnrFYfjmKp0RGR0dXWvbNN9+UJAUHB2vOnDlmAqtYXFycmVTLyMjQypUrfdIGAAAAAAAAPCMRVor7lMiHH37YY9mMjAx9+umnkqSbbrpJbdq0KbfcwIED1aRJE0nS+vXrLW8DAAAAAAAAlSMR5sZ9SuTo0aP1y1/+0mP5Xbt2yeVySSqa0lgRp9OpHj16SJK++uorZWZmWtoGAAAAAAAAKkcizE3xlMgWLVpUejeYJB06dMjcjo+P91i2ffv2kiTDMHTw4EFL2wAAAAAAAEDlSIT9h/uUyNmzZ3u1GH1aWpq53apVK49lmzdvXm49K9oAAAAAAABA5UiEqeSUyKFDh+rmm2/2ql56erq53bhxY49lY2JizO2MjAxL2wAAAAAAAEDlgmu7A4GgeEpk06ZN9ac//cnrenl5eeZ26Sc9lhYaGlpuPSvaqA2HDx+W02lNHrWgoMD8/549eyxpE4GPuNsTcbef+hTz0NBQJSQkKPdcrrJzcr2udy6v6Pyel3dO2Tk5PqsTSPUMl2H+PzsnO2D76at6EaFF67/u379f+fn5Xter6+rTeIf3iLs9EXd78ibuxWugBzLbJ8Lcp0TOmjVLjRo18rruhQsXzG33JFV53Pe717OijdpQWFiowsJCy9stHliwF+JuT8Tdfup6zIsvABkuw0z0eMMwDPP/3tarTp1ArVfe7wOxn5bW+0/ZCxcu1Pn3fXXZ9d9td8Tdnoi7PdXluNs6EeY+JXLgwIHq169fleqHh4eb2wUFBR4TWe5XA93LWdFGbQgKCrL8jjBJCgkJsaRNBD7ibk/E3X7qU8yDg4s+NjmcDjmcDq/rORwO8//e1qtOnUCq5540Kq+9QOmnz+r9p2xwcHCduDJulfo03uE94m5PxN2evIm7y+XyyU0zVrJ1Iqx4SmTjxo3NhFhVREZGmtvnz5/3OonlPgXSijZqQ/v27b16oIA39uzZo4KCAoWEhKhz586WtInAR9ztibjbT32MeWREpKLyvb8YFPGfi17h4RGKauBdMqU6dQKpXnZOtgyXIYfToagGZT8vBEo/fVUvMqKoXkJCgtd16oP6ON5ROeJuT8TdnryJe3Z2tg4cOODnnlWNbRfLd58SOXPmTMXGxla5jejoaHO7ssXr3fe7H8uKNgAAAAAAAFA52ybC1q9fb25Pnz5dV155ZZn/+vbta5ZZvXq1+fsXX3xRktSuXTtz//Hjxz0e78SJE+Z2y5YtzW0r2gAAAFXjqsJ6T0BVRYQFV/s9xnsTAADfsvXUyJqKi4sztw8dOqTrrruuwrKHDh2SVLTGRIcOHSxtAwAAVI3T6dCi5FSlZ3n/FOZ2LaI1vA/nX1QuPDSoWu+x2OhwTRzU0Yc9AwAAtk2E3XXXXZUujn/69Gk9/vjjkqTrr79eiYmJkqTLL79cktS1a1eFhISooKBAO3bs0Pjx48ttp7CwUDt37pRUtFaE+3RIK9oAAABVl56Vp9OZ3icpGjes3fU5UfdU9T0GAAB8z7aJsI4dO6pjR89X3I4ePWput2zZskziLDo6Wj179tTWrVuVkpKiY8eOlTtlcd26dUpPT5ckDRgwwPI2AAAAAAAAUDnbrhFmlQkTJkgqeozo9OnTlZ2dXWL/4cOHNXfuXElSgwYNNHLkSJ+0AQAAAAAAAM9se0eYVW688Ub1799fGzZs0O7duzVkyBCNHTtWLVq00N69e7V06VLl5ORIkh5++OFyn/ZoRRsAAAAAAADwjESYBZ5++mnl5ORo27ZtOnr0qJ555pkS+x0Oh5KSkjRmzBiftgEAAAAAAICKkQizQEREhBYuXKjk5GStWbNG+/bt09mzZxUTE6Nu3bopMTFR3bp183kbAAAAAAAAqBiJMA9at26tAwcOeFXW4XBo8ODBGjx4cLWPZ0UbAADYictlyOl01HY3AAAAUEeQCAMAAHWW0+nQouRUpWfleV2nXYtoDe/TwYe9AgAAQKAiEQYAAOq09Kw8nc70PhHWuGGYD3sDAACAQOas7Q4AAAAAAAAA/kAiDAAAAAAAALZAIgwAAAAAAAC2QCIMAAAAAAAAtkAiDAAAAAAAALZAIgwAAAAAAAC2QCIMAAAAAAAAtkAiDAAAAAgAEWHBcrmMKterTh0AAOwquLY7AAAAAEAKDw2S0+nQouRUpWfleVUnNjpcEwd19HHPAACoP0iEAQAAAAEkPStPpzO9S4QBAICqYWokAAAAAAAAbIFEGAAAAAAAAGzBr1Mjd+7cKUm67LLL1KxZsyrVPXLkiLZt26acnBxNnjzZF90DAAAAAABAPebXO8LuuusuJSYm6oMPPqhy3Q0bNmj+/PlavHixD3oGAAAAAACA+q7OTI0sKCiQJOXk5NRyTwAAAAAAAFAXWT418sKFC9q9e7fHMj/88IM5TbIyhYWFSktL0zvvvCNJatSoUY37CAAAAAAAAPuxPBEWHByst956SykpKeXuNwxDy5Yt07Jly6rctsPhULdu3WraRQAAAAAAANiQT6ZGzpw5U+Hh4TIMo8R/xUr/3tv/oqOjNW3aNF90GQAAAAAAAPWcT54a2bx5c82dO1dbt24t8fvVq1fL4XCoY8eO6tChg1dtBQUFKSoqSq1bt9att96qSy65xBddBgAAAAAAQD3nk0SYJA0cOFADBw4s8bvVq1dLkm677TZNnDjRV4cGAAAAAAAAyvBZIqw8PXr0kFR0xxgAAAAAAADgT35NhP3zn//05+EAAAAAAAAAk08WywcAAAAAAAACjV/vCHNXUFCgPXv26NSpUzp//rxcLpfXdYcOHeq7jgEAAAAAAKBe8nsi7MKFC3rxxRe1ZMkS5ebmVrm+w+EgEQYAAADUkMtlyOl0+K0eAACBwO+JsPvvv1+bN2+WYRj+PjQAAABQr0SEBVc7MeV0OrQoOVXpWXle14mNDtfEQR2rfCwAAAKFXxNhGzduVEpKihyOohP1ZZddpm7duik2NlYRERH+7AoAAABQ54WHBlUrodWuRbSG9+mg9Kw8nc70vh4AAHWdXxNha9asMbfvvfde3X///WZSDAAAAED1VDWh1bhhmA97AwBA4PJrIuyrr76Sw+HQVVddpalTp/rz0AAAAAAAALA5pz8PlpGRIUm66aab/HlYAAAAAAAAwL+JsMaNG0uSwsPD/XlYAAAAAAAAwL+JsPj4eEnS4cOH/XlYAAAAAAAAwL+JsNtvv12GYSglJUU///yzPw8NAAAAAAAAm/NrImzIkCHq3r27zp07p2nTpuns2bP+PDwAAAAAAABszK9PjXQ4HFqwYIEmT56sL774Qv369dOAAQPUpUsXNWnSRBEREV6106NHDx/3FAAAAAAAAPWNXxNhXbp0kSS5XC45HA5lZmZq+fLlWr58uddtOBwO7d2711ddBAAAAAAAQD3l10TY+fPny/zOMAx/dgEAAAAAAAA25ddE2LBhw/x5OAAAAAAAAMDk10TY/Pnz/Xk4AAAAAAAAwOTXp0YCAAAAAAAAtYVEGAAAAAAAAGyBRBgAAAAAAABswa9rhCUmJta4DYfDocWLF1vQGwAAEChcLkNOp6O2uwEAAIB6zq+JsM8//1wOR/U+5BqGIUnVrg8AAAKX0+nQouRUpWfleV2nXYtoDe/TwYe9AgAAQH3j10SYdDGhVRUOh0Nt27ZVRESED3oEAAACQXpWnk5nep8Ia9wwzIe9AQAAQH3k10TYpk2bKi1jGIbOnz+vn3/+WV9//bXefvttnThxQg0aNNDf//53NWnSxA89BQAAAAAAQH3j10RYq1atvC4bFxennj17avz48frDH/6gXbt26f7779eSJUuYHgkAAAAAAIAqC/inRkZGRup//ud/FBoaqi+//FLvvfdebXcJAAAAAAAAdVDAJ8IkqUmTJurXr58Mw9CaNWtquzsAAAAAAACog+pEIkySrrjiCknS4cOHa7knAAAAAAAAqIvqTCIsOztbknT27Nla7gkAAAAAAADqojqRCHO5XEpJSZEknhoJAAAAAACAagn4RFhOTo4ee+wxff/993I4HLr++utru0sAAAAAAACog4L9ebDHHnvM67L5+flKT0/XV199pXPnzpm/HzNmjC+6BgAAAAAAgHrOr4mw1atXy+FwVLmeYRiSpAkTJqhLly5WdwsAAPhIaGionE6ngoP9+pEDAAAAKJffP5UWJ7WqokOHDkpMTNTIkSN90CMAAGAVl8uQ03nxoldCQkIt9gYAAAAoya+JsH/84x9el3U6nYqIiFCLFi0UGxvrw14BAACrOJ0OLUpOVXpWniQp91yuDJchh9OhyIjIcuu0axGt4X06+LObAAAAsCm/JsKuu+46fx4OAADUgvSsPJ3OLEqEZedcTIRF5Zf/jJ7GDcP82T0AAADYWMA/NRIAAAAAAACwQkCsXPvDDz/oxx9/VGZmpiQpOjpabdq0Udu2bWu5ZwAAAAAAAKgvai0RlpGRobfeekurVq3SqVOnyi3TuHFjDRw4UPfee6+aNGni5x4CAAAAAACgPqmVqZE7d+7U7bffrtdee02nTp2SYRjl/peenq63335bQ4YM0aefflobXQUAAAAAAEA94fc7wvbs2aNJkyYpPz9fhmFIkmJiYpSQkKCYmBi5XC6dOXNGBw4cUFZWlgzD0KlTp3TPPfdo+fLluuqqq/zdZQAAAAAAANQDfk2EFRQUaPr06Tp//rwk6eqrr9Yf//hH/fKXvyy3/CeffKLnnntOqampys/P17Rp07R27VqFhIT4s9sAAAAAAACoB/w6NXLNmjU6evSoHA6Hbr75Zi1fvrzCJJgk9erVS8uXL9evf/1rSUWL6q9du9ZPvQUAAAAAAEB94tdE2KZNmyRJDRs21NNPP+3VnV3BwcF6+umnFR0dLUnasGGDT/sIAAAAAACA+smvibB9+/bJ4XCob9++iomJ8bpeo0aN1LdvXxmGoW+++cZ3HQQAAABQLaGhoQoLC1NoaGhtdwUAgAr5dY2wM2fOSJLatm1b5brFdTIyMqzsEgAAAAAvRYQFy+Uy5HQ6yuxLSEjwWLeiegAA+JNfE2FhYWEqKChQbm5ulesW14mMjLS6WwAAAAC8EB4aJKfToUXJqUrPyiuxL/dcrgyXIYfTociIkp/ZY6PDNXFQR392FQCAcvk1EdayZUsdOHBAO3bsqHLd4jrNmze3ulsAAAAAqiA9K0+nM0smwrJzLibCovL9ugILAABe8+sZ6vrrr5ck7dmzRykpKV7X27Rpk7766is5HA6zDQAAAAAAAKAq/JoIGz16tByOonUB/vjHP2rLli2V1tm8ebMefvhhSZLD4dDIkSN92UUAAAAAAADUU36dGhkXF6exY8fqnXfeUW5uru6991716NFD/fv3V0JCgvkkyTNnzujAgQNav369du3aJcMw5HA4NGLECMXHx/uzywAA2BKLWgMAAKA+8msiTJIeffRRpaWl6aOPPpLD4dDOnTu1c+fOCssbhiFJuuGGG/T444/7q5sAANhaRYthe9KuRbSG9+ngw14BAAAANeP3RFhoaKhefvllvfzyy3rrrbeUk5PjsXxkZKQmTJigpKQkOZ0sugkAgL+Utxi2J40bhvmwNwAAAEDN+T0RJklBQUGaMmWK7rrrLm3evFmfffaZjh49qoyMDBmGoejoaLVr107dunVT//79FR0dXRvdBAAAAAAAQD1SK4mwYjExMRo2bJiGDRtWm90wZWZmatmyZdq8ebO+++475eTkqGHDhrryyit166236o477lBoaGiF9Q3D0Nq1a7Vq1Srt27dPubm5atq0qXr06KHx48erc+fOlfbBijYAAAAAAABQll8TYT/88IMuu+yySsu9+uqruvzyy9W7d2+PiScrbd++XdOnT1d6enqJ36enp2v79u3avn27lixZoldeeUVt2rQpUz8vL09Tp04t8yTMtLQ0paWlKTk5WdOmTdPdd99dYR+saAMAAAAAAADl80sibM2aNVqwYIFCQkL073//22PZgoICvfrqqzp//ryaNWumKVOmaPjw4T7t3/79+3Xvvffq3LlzkqQbb7xRffv2VUxMjI4dO6Y1a9bo0KFDOnTokH73u9/pX//6V5npmjNmzDATWHFxcRo1apQuueQSpaamatmyZcrNzdWzzz6rZs2aaciQIeX2w4o2AABwx9MfAQAAgIt8mgg7d+6cHnzwQW3evFmGYcjhcCg9PV2xsbEV1vm///s/5eXlyeFw6MSJE/rzn/+sjz/+WM8884zP7g6bM2eOmQSbNWuWxo4dW2L/hAkT9Oijjyo5OVnff/+9Xn75ZT366KPm/k8++URr166VJPXs2VOvv/66wsKKFgy+/fbbNWLECI0bN04ZGRmaN2+e+vbtq6ioqBLHsKINAABK4+mPAAAAwEU+ewxjYWGhJk+erM2bN5f4/cGDBz3WCwsL080336ygoCBJRWtmffjhh3rggQdkGIbl/Txy5Ih27twpSerXr1+ZJJgkBQcHa+7cubr00kslSe+++64KCwvN/W+++aZZbs6cOWYCq1hcXJxmzpwpScrIyNDKlSvLHMOKNgAAKE/x0x+9/S8z+3xtdxkAAADwCZ8lwl555RVt377d/PmOO+7Qhx9+qJ49e3qs17lzZ7322mvatGmTuYi+YRhKSUnR4sWLLe+nex89TTcMCwtT7969JRUtqv///t//k1SUlPr0008lSTfddFO564dJ0sCBA9WkSRNJ0vr160vss6INAAAAAAAAeOaTRFh6eroWLlwoSQoKCtLzzz+vefPmVZjgKU+zZs00f/58PfHEE3I4HDIMQy+++KJycnIs7avT6VSHDh0UFRWldu3aeSzbqFEjczsrK0uStGvXLrlcLknymORzOp3q0aOHJOmrr75SZmamuc+KNgAAAAAAAOCZTxJh77//vrnO10MPPaRbbrml2m2NGjVK48aNkyTl5ubq/ffft6qbkqRx48Zp7dq1+uKLLxQfH++x7OHDh83tmJgYSdKhQ4fM31VWv3379pKK7nBznyJqRRsAAABAoIoIC5bLVb1lTqpbDwCA8vhksfzPPvtMktSkSRPdddddNW5vypQpWrVqlc6fP69PPvmk3HW8fO2nn37S1q1bJUmNGzdW27ZtJUlpaWlmmVatWnlso3nz5uZ2WlqaeXeXFW0AAAAAgSo8NKhaD++IjQ7XxEEdfdgzAIDd+CQRduDAATkcDt1www3movc1ERMTo549e2rLli3au3evBT2suqeffloFBQWSpNtuu01OZ9HNdOnp6WaZxo0be2yj+C4yqWhdsGJWtAEAAAAEuuKHdwAAUFt8MjWyOEFTfNeUFa688kpJ0pkzZyxr01vLli3TunXrJEmRkZG6++67zX15eRdP5KWf9FhaaGhoufWsaAMAAAAAAACe+eSOsAsXLkiSQkJCLGuzQYMGkmTeleUvGzdu1BNPPGH+PGvWLDVr1sz8ufjfKpVMUpXHfb97PSva8LfDhw+bd8XVVHFMCwoKtGfPHkvaROAj7vZE3P0rNDRUCQkJyj2Xq+ycXK/rncsruiiTl3dO2VV4SE159Yz/rO1juAxl52T7/Hj1pV5d6KOnepXFPVD6WR/qBVIfPcW9useLCC16oNT+/fuVn5/vdT34D+d2eyLu9uRN3IsfBBjIfJIIi46OVnp6uqV3bxU/pbE4IeYPH374oaZPn67CwkJJUmJiooYMGVKiTHh4uLldUFDgMZHlfvJ2L2dFG/5WWFhovi5W8neiE4GBuNsTcfe94gsWhsswv6B6wzAM8/9W1quoLV8dry7Xqwt99LZeue+FAOxnXa0XqH0sva/ax/tP2QsXLnDeqAOIkT0Rd3uqy3H3SSLs8ssv1+nTpy1dz+ubb76RVHKxeF9auXKlZs2aZd51NWzYMP3pT38qUy4yMtLcPn/+vNdJLPcpkFa04W9BQUGW3xEmWXsXIQIbcbcn4u5fwcFFp3mH0yGH0+F1PYfDYf6/pvXcv/BW1JaVx6sv9epCHz3VqyzugdLP+lAvkProKe7VPt5/ygYHB9eJuwzsiHO7PRF3e/Im7i6Xyyc3zVjJJ4mwrl27ateuXfriiy90+vRpNWnSpEbtnTx5Urt27ZLD4VBCQoJFvazYggUL9OKLL5o/Dx8+XHPmzDFP4O6io6PN7YyMDDVs2LDCdt0Xt4+NjbW0DX9r3769oqKiLGlrz549KigoUEhIiDp37mxJmwh8xN2eiHvtiIyIVFS+9xcvIv5zp3J4eISiGnj/hbW8etk52TJchhxOh6IalH/esPJ49aVeXeijp3qVxT1Q+lkf6gVSHz3FvbrHi4woqueP7wCoHs7t9kTc7cmbuGdnZ+vAgQN+7lnV+GSx/FtuuUVS0fS5BQsW1Li9BQsWmHdm3XzzzTVuryIul0uPP/54iSRYYmKi5s6dW+HdT+3atTO3jx8/7rH9EydOmNstW7a0tA0AAAAAAAB45pNEWKdOnfSLX/xChmFo2bJlSk5OrnZb7733nlasWCGHw6EmTZqob9++Fvb0IpfLpYcffljLly83fzd16lTNmDGj3DvBisXFxZnbhw4d8niM4v0Oh0MdOnSwtA0AAAAAAAB45pNEmCQ9/PDDZgLpkUce0QsvvFClJ73k5eXpb3/7W4l1uaZOneqzdbFmzZplJuycTqdmzZqlyZMnV1qva9eu5tzYHTt2VFiusLBQO3fulFR0a7f7dEgr2gAAAAAAAIBnPkuEXXvttbr33nuLngxjGHrllVfUu3dvzZ07V5s2bSrzREnDMHTq1Clt2LBBjz/+uHr37q3XX3/dXGTt9ttv18iRI33S13fffde8E8zpdGr+/PkaO3asV3Wjo6PVs2dPSVJKSoqOHTtWbrl169YpPT1dkjRgwADL2wAAAAAAAIBnPlksv9j999+vc+fOadGiRXI4HDp9+rSWLFmiJUuWSCp6ykBMTIzy8/OVlZVlPlZZUontkSNHavbs2T7p46lTpzR37lzz50ceeURDhw6tUhsTJkzQ1q1bVVBQoOnTp2vhwoUlFpI/fPiweYwGDRqUm9Czog0AQP3lchlyVuFJawAAAADK8mkiTCpKLHXp0kXz5s3TyZMnZRiGHA6HDMNQfn6+Tp48WWHdyy67TA899JC5+L4vLF68WNnZ2ZKKFp9v3bq1Nm7cWGm9q6++2lys/sYbb1T//v21YcMG7d69W0OGDNHYsWPVokUL7d27V0uXLlVOTo6koimj5T3t0Yo2AAD1l9Pp0KLkVKVn5Xldp12LaA3vw3qSAAAAQDGfJ8Ik6dZbb1WfPn20Zs0aJScna/fu3eZTIEtr1KiRrr/+eg0ePFi9e/dWUFCQT/u2evVqc/vYsWO67777vKo3f/583XHHHebPTz/9tHJycrRt2zYdPXpUzzzzTInyDodDSUlJGjNmTIVtWtEGAKD+Ss/K0+lM7xNhjRv6Zl1NAAAAoK7ySyJMkkJDQzVq1CiNGjVK58+f15EjR3T8+HHl5uYqKChIDRs2VNu2bdWmTRuPT2m0Unp6un7++WdL2oqIiNDChQuVnJysNWvWaN++fTp79qxiYmLUrVs3JSYmqlu3bj5vAwAAAAAAAOXzWyLMXVhYmK6++mpdffXVtXF4U2xsrA4cOGBZew6HQ4MHD9bgwYNrtQ0AAAAAAACU5bOnRgIAAAAAAACBhEQYAAAAAAAAbIFEGAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAAAAAbIFEGAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAAAAAbIFEGAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAgHrH5TL8Wg8AUDcE13YHAAAAAMBqTqdDi5JTlZ6V53Wd2OhwTRzU0Ye9AgDUNhJhAAAAAOql9Kw8nc70PhEGAKj/mBoJAAAAAAAAWyARBgAAACAgRYQFs2YXAMBSTI0EAAAAEJDCQ4OqtdZXuxbRGt6ngw97BgCoq0iEAQAAAAhoVV3rq3HDMB/2BgBQlzE1EgAAAAAAALZAIgwAAD9jvRsAAACgdjA1EgAAP6vqejesdQMAAABYg0QYAAC1oCrr3bDWDQAAAGANpkYCAAAAAADAFkiEAQAAAAAAwBZIhAEAAAAAAMAWSIQBAAAAAADAFkiEAQAAAICkiLBguVxGtepWtx4AwL94aiQAAAAASAoPDZLT6dCi5FSlZ3n3ZF9Jio0O18RBHX3YMwCAVUiEAQAAAICb9Kw8nc70PhEGAKg7mBoJAAAAAAAAWyARBgAAAAAAAFsgEQYAAAAAAABbIBEGAAAAAAAAWyARBgAAAAAAAFsgEQYAAAAAAABbIBEGAAAAAAAAWyARBgAAAAAAAFsgEQYAAAAAAABbIBEGAAAAAAAAWyARBgAAAAA1EBEWLJfLqFbd6tYDAFRPcG13AAAAAADqsvDQIDmdDi1KTlV6Vp7X9Vpe0kCjf3NltY7pchlyOh3VqgsAdkYiDACAauJLCADAXXpWnk5nep8Ia9wwrFoJtNjocE0c1LE6XQQA2yMRBgBANVXny0u7FtEa3qeDD3sFAKhrqppAAwBUH4kwAABqoDpX/wEAAADUDhbLBwDYHgsVAwAAAPbAHWEAANtjiiMAAABgDyTCAAAQUxwBAAAAO2BqJAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAAAAAbIFEGACg3uDpjwAAAAA8YbF8AEC9wdMfAQAAAHhCIgwAUK/w9EcAAAAAFWFqJAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAANQhEWHB1XpADA+VAQDWCAMAAACAOiU8NKjKD4iJjQ7XxEEdfdwzAAh8JMIAAAAAoA6q6gNiAABMjQQAAAAAAIBNkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAEHJfLqO0uAAAAAKiHgmu7AwAAlOZ0OrQoOVXpWd4/Er5di2gN79PBh70CAAAAUNeRCAMABKT0rDydzvQ+Eda4YZgPewMAAACgPmBqJAAAAAAAAGyBRBgAAAAAAABsgUQYAAAAANRzEWHB1X4YDQ+xAVCfsEYYAAAAANRz4aFB1XoYTWx0uCYO6ljh/tDQUDmdTgUH89USQN3AXysAAAAAsImqPoym+E4yp9NR7v6EhIQK63qqBwC1hUQYAMBvuGoMAEDdUtmdZLnncmW4DDmcDkVGRJq/r+xOMgCoLXwTAQD4TOkrwZ6uGgMAgMBV0Z1k2TkXE2FR+SxBDSDwkQgDAPhM6SvIFV01dteuRbSG9+ngz24CAAAAsAkSYQAAn3K/guzNVePGDcP82T0AAAAANsK9qwCASvHYdAAAAAD1AXeEAQAqVZ3HrTPFEQAAAECgIREGAPBKVR+3zhRHAABQHaUftuPregDshUQYANgIHxABAECgq86d6LHR4Zo4qKMPewWgviARBgA2whRHAABQF1T1TnQA8BaJsABjGIbWrl2rVatWad++fcrNzVXTpk3Vo0cPjR8/Xp07d67tLgKo45jiCAAAAMCuSIQFkLy8PE2dOlVbtmwp8fu0tDSlpaUpOTlZ06ZN09133107HQQAAAAAAKjDSIQFkBkzZphJsLi4OI0aNUqXXHKJUlNTtWzZMuXm5urZZ59Vs2bNNGTIkNrtLAAAAABUICIsmLVJAQQkEmEB4pNPPtHatWslST179tTrr7+usLCi6Ui33367RowYoXHjxikjI0Pz5s1T3759FRUVVZtdBlCL+GAJAAACWXhoEGuTAghIJMICxJtvvilJCg4O1pw5c8wkWLG4uDjNnDlTDz74oDIyMrRy5UpNnDixNroKwELVTWjxwRIAANQFrE0KINCQCAsAGRkZ+vTTTyVJN910k9q0aVNuuYEDB2revHk6ffq01q9fTyIMqAdqktDigyUAAEDtqe4FTe7sB2oXibAAsGvXLrlcLklF0yIr4nQ61aNHD61fv15fffWVMjMz1ahRI391E4CPkNACAAComZqsSVboMhTkpzv0Y6PDNXFQxyofC4B1SIQFgEOHDpnb8fHxHsu2b99ekmQYhg4ePKgePXr4tG8AvMfVPQAAgNpR0zXJ/HWHPoDaRyIsAKSlpZnbrVq18li2efPmJeqRCAOs5691u1izCwAAwFrVvdOeO/QB+yARFgDS09PN7caNG3ssGxMTY25nZGT4qEeAvflr3S4+QAEAANiLv6dw+mrGQmhoqJxOp4KDSSmg7uFdGwDy8i5+aS79tMjSQkNDy63na4WFhSV+zs3Ntazt4vXRXC6XsrOzLWsXvmPFwqBBQUHm/wMx7oUFeVLhea/LX8g/p+zsbDWKkFTo3WvTINRV5Tp1vV5kcLAMlyGH06GI8PLbCoR+Blq9utDHiuoRc/vFXKo87oHSz/pQL5D66CnugdRPu9bz1bEqintdeE1qo16TKIdyc3O07pPvdDYn3+t6zZtE6tfd2lSpXsMGobqt1+VeH8NdZZ/3W7dubW67f5ZnqZD6zZvv7qVzBaVzCYHAYRiGUdudsLvExETt2LFDkrR//345HBX/4di+fbsmTJggSZoyZYqSkpL80UWdPHlSP/74o1+OBQAAAAAA6r42bdro0ksvre1ulOCs7Q5ACg8PN7cLCgo8ls3Pv5j9d787DAAAAAAAAJ6RCAsAkZGR5vb5856nYrknwiqbRgkAAAAAAICLWCMsAERHR5vbGRkZatiwYYVl3RfIj42N9WW3SnBfpF8qSsIVr/EEAAAAAABQWFhY4gaf0rmEQEAiLAC0a9fO3D5+/LjatGlTYdkTJ06Y2y1btvRlt0oIDQ0NuHm9AAAAAAAAVcHUyAAQFxdnbh86dMhj2eL9DodDHTp08Gm/AAAAAAAA6hMSYQGga9euCgkJkSTz6ZHlKSws1M6dOyVJCQkJJaZUAgAAAAAAwDMSYQEgOjpaPXv2lCSlpKTo2LFj5ZZbt26d0tPTJUkDBgzwW/8AAAAAAADqAxJhAWLChAmSpIKCAk2fPl3Z2dkl9h8+fFhz586VJDVo0EAjR470dxcBAAAAAADqNIdhGEZtdwJF7r//fm3YsEGS1Lp1a40dO1YtWrTQ3r17tXTpUuXk5EiSZs+erTFjxtRmVwEAAAAAAOocEmEB5Ny5c0pKStK2bdvK3e9wOJSUlKSkpCQ/9wwAAAAAAKDuIxEWYAzDUHJystasWaN9+/bp7NmziomJUbdu3ZSYmKhu3brVdhcBAAAAAADqJBJhAAAAAAAAsAUWywcAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALQTXdgdQP/3hD3/Qli1bNH/+fN1xxx2WtHnw4EEtWrRIO3bs0MmTJ9WwYUPFxcVp6NChGjZsmIKCgvzSBooUFBRoxYoVSk5O1qFDh1RQUKDmzZurV69euuuuu3TFFVdUq90dO3YoMTGxyvVatWqllJSUMn3s2rWrCgoKKq0fGhqqr7/+usrHtRtfxd29fati9uWXX+of//iHvvjiC505c0YxMTFKSEjQyJEj1b9//xr10058HfPiY6xbt04ffPCB9u7dq4yMDIWHh+uyyy7Tr3/9a915552KjY31WJ+xXjWGYWjt2rVatWqV9u3bp9zcXDVt2lQ9evTQ+PHj1blz5xofg/N24PF13A3D0KZNm/Tee+9pz549Sk9PV0hIiFq1aqVevXopMTFRLVu29NhGnz59lJaW5tXxtm3bpqZNm9aoz/Wdr2NuVbwY69byRdyPHj2qvn37Vqs/Bw4cKPM7xrp/zJ49W++8846SkpI0ZcqUGreXlpamRYsWaevWrTp27JgiIiLUtm1b3XbbbRozZozCw8P90kZNOQzDMHx+FNjKu+++q8cee0ySLEuErVy5UrNnz67wS0737t310ksvKSYmxqdtoMiZM2f0+9//vsIvk+Hh4Zo9e7aGDh1a5barmwhr3bq1Nm3aVOJ3Bw4c0ODBg72qz5fjyvky7sWsitmCBQu0YMECVXSKu+WWW/Tss88qNDS02n21A3/EPC0tTVOmTFFqamqFZaKjo/W3v/1NN954Y7n7GetVk5eXp6lTp2rLli3l7g8KCtK0adN09913V/sYnLcDj6/jnpmZqQceeECffPJJhWVCQ0P15JNPVvg3Izs7W927d6/wb3dpfDn2zNcxtypejHVr+Sru1U2EORwO7d+/v8TvGOv+sX37dv32t7+Vy+WyJBH20Ucf6YEHHlBOTk65++Pj4/XKK6+odevWPm3DCtwRBkt99NFHevzxxy1tc+vWrZo5c6YMw1BERITGjBmja665RqdPn9a//vUvHTx4ULt27dL06dO1cOFCOZ1lZ/xa0QaKFBYWKikpyfwi2aVLFw0dOlRRUVH64osvtGrVKuXl5WnGjBlq0aKFrr/++iq136FDB7300kuVljMMQ3PnztXx48clqdw/7O5Xn2bMmOHxKjQx98zXcS9mRcxWrFihF198UZIUExOjcePGqX379kpLS9OyZcuUlpamDz/8ULNnz9bcuXOr1U878EfMc3JyNGnSJH377beSpPbt22vo0KFq1aqVMjMztWnTJm3dulVZWVmaPHmy3n77bXXq1KlMO4z1qpkxY4b5BSkuLk6jRo3SJZdcotTUVC1btky5ubl69tln1axZMw0ZMqTK7XPeDky+jLvL5dJ9992nnTt3SpJatmyp4cOH64orrlBOTo62bdumDz/8UPn5+Xr00UcVHR2tPn36lGnnwIED5hfje+65p9zx7q5Ro0ZV6qfd+HqsWxEvxrr1fBX3Jk2aePUZXZJefvll8wJXRZ/RGeu+9c033ygpKUkul8uS9g4cOKApU6bo/PnzCgoK0ogRI9S9e3fl5OTovffe0+7du3Xw4EFNnjxZy5cvV0REhE/asIwBWGTVqlXGNddcY8THx5v/rVq1qkZtnj9/3ujTp48RHx9v/OIXvzC++eabMvvvu+8+83jJyck+aQMXLV++3HytHnroIaOwsLDE/s8//9zo1KmTER8fbwwYMKDMfqu88sorZj/+8pe/lFvmv//7v434+HgjISHBOHfunE/6YRf+intNY5aenm5ce+21Rnx8vNGrVy/j6NGjJfafPXvWGD16tPlv+fLLL6vVTzvwR8yff/558xgPPvigUVBQUKbM22+/bZYZNmxYue0w1r23bds28/VMTEw08vLySuw/fPiwcd111xnx8fHGddddZ5w9e7ZK7XPeDky+jvu//vWvEu3n5OSUKbNp0ybj6quvNuLj442bbrqpTB8Mo+R4//bbb6v2j0QJvo65YdQ8Xox16/kj7pV5//33zT78/ve/N1wuV5kyjHXf2rJli9G9e/cS38tfeOGFGrU5ZswYIz4+3rjqqquMLVu2lNjncrmMJ554wjzWq6++6rM2rEJKHTV29uxZPf7443rssceUn59vadsffvihjh49KkmaNGmSOnbsWGJ/aGionn76afNW6ddee80nbeCiN998U1LRlZm//OUvZa7M9ejRQ/fdd58k6ciRI9q4caPlfdi/f78WLFggSWrXrp0eeeSRcssV3yVy2WWX+WWueX3mr7jXNGYrVqxQdna2JOmhhx5Sq1atSuyPiorSc889p5CQEEmMd098HXPDMLRixQpJUtOmTTV37lwFB5e9UX3cuHH6zW9+I0lKTU3V3r17y5RhrHuvOK7BwcGaM2eOwsLCSuyPi4vTzJkzJUkZGRlauXJlldrnvB2YfB335cuXS5LCwsL017/+VZGRkWXK9OnTR3feeack6aefftLHH39cpkzxWA4LC1Pbtm2r1AeU5OuYSzWPF2Pdev6Iuyc//fSTnnjiCUlS48aNNW/ePDkcjjLlGOu+kZ+frxdeeEH33HOPsrKyLGt39+7d+vLLLyVJQ4cO1c0331xiv8Ph0IwZMxQXFyep6H1YeqqzFW1YiUQYamTt2rXq37+/+QEoNjZWY8aMsaz9Dz74QFLRwBg7dmy5ZRo0aGCuQ3bw4EF99913lreBIvv27TNfm0GDBikqKqrccmPHjjUXNV2/fr2lfTAMQzNnzjT/MD7xxBMV3jZbfJKNj4+3tA9248+41zRmxeM9OjpagwYNKrdMy5YtzSk5n3zyiZk4w0X+iPmhQ4f0888/S5JuvfXWMh/W3d16663m9ldffVVmP2PdOxkZGfr0008lSTfddJPatGlTbrmBAweqSZMmkqoeV87bgcfXcc/OztaePXskSTfccIPHdXy8Hcvt27dnClwN+GOsSzWPF2PdWv6Kuydz5841EzCPPPKILrnkknLLMdat9+mnn2rAgAF66aWX5HK5FBkZqYkTJ1rSdvFYlYouUJbH6XSa+zIyMvTZZ59Z3oaVeNehRpYvX67Tp09Lkm688UatXr1aXbp0saz94rUmrrzySo9PDOvZs6e5XfoKoxVtoMjnn39ubv/yl7+ssFx0dLSuvvpqSUWLW1pp1apV5gfuQYMGVbgu0ZkzZ3Ty5ElJfDmuKX/FvaYxy8zMND9Y9ejRw+MTporfN/n5+T49ydZV/oh5Tk6OOnbsqNjYWF1++eUey7qvDVL6Cidj3Xu7du0y1wpxP+eV5nQ61aNHD0lFyYrMzEyvj8F5O/D4Ou5nzpxRp06d1LRp00qfIutpLBuGoUOHDkliLNeUP8a6FfFirFvLH3H3ZPv27dqwYYMkqVu3bho2bFi55RjrvvH++++bd1hec801WrlypXr37m1J28VjtVGjRubnvvK4v++2bt1qeRtWYrF81FirVq304IMP6rbbbrO03Z9++sn8kNShQwePZYtvoZRU4qkkVrSBi4pPWlLlr2f79u319ddfKzMzU8eOHav0ceneOH/+vF544QVJRbdST58+vcKyBw8eLNPX9PR0HTx4UAUFBWrevLnat29f7u3aKMlfca9pzA4fPmwuvOpNP4vt379f/fr187qfduCPmHft2lXvvvuuV2WPHDlibpd+ahhj3Xvuca3sy0fxGDEMQwcPHjS/NHnCeTsw+Trubdq08Xp6laexnJaWZt6hWxz77Oxs7d+/Xzk5Obr00ksVHx/v8SIHivg65lLN48VYt54/4u7JX//6V3P70UcfrbAcY913YmNjlZSUpDFjxigoKMi8YaUmCgsLzb/dcXFxHu/gu/zyyxUUFKTCwsISY9WKNqxGIgw1Mn36dHXq1KncdV1qqjijLanMOj+lNWvWTE6nUy6XS8eOHbO0DVyUlpYmqehKUosWLTyWbd68ubltVSJs2bJl+umnnyRJY8aM8dim+1PksrOzNWHCBH322WclHtPcrFkzTZo0SePHj+dk64G/4l7TmBX3U1Klj1x2/3cw3suq7bHuzjAMrVq1yvy59F3HjHXvuY+Rys6J7nFNS0vz6ksS5+3A5Ou4V4W3Y9npdCopKUkpKSkqLCw0fx8TE6M777xTv//971kP0AN/xLym8WKsW682x/rGjRv1zTffSJL69eunzp07V1iWse4b48eP16xZsyx/vU6dOmWuA17Z+yooKEhNmzbViRMnSoxVK9qwGlMjUSNdu3b1SRJMKrrVvljjxo09lg0JCTEXZs3IyLC0DVyUnp4uqWi9htDQUI9l3a/0WvF6ulwu/eMf/5BUtADohAkTPJZ3P8nOmDFD27dvL/HFWCq6Gjl37lxNnjxZeXl5Ne5jfeWvuNc0ZsX9lCof7+7TcxjvZdXmWC9t+fLl5l1f8fHxSkhIKLGfse69qoyR6sSV83Zg8nXcvbV161Zt3rxZktSkSRP16tWrxH73sfzUU0/pf//3f0t8MS7u04IFC3TXXXeV+HehJH/EvKbxYqxbrzbH+ltvvWVuT5o0yWNZxrpvdOrUySdJw6q8r6SLn7Hd31dWtGE1EmEIWOfOnTO3PS2iXLqM+5ccK9rARcWvizevpfuXZ/c4VFdKSop59fDWW2+t9K4T95NsgwYN9OCDD2rjxo36+uuvlZKSokcffVQNGzaUJG3ZskWzZs2qcR/rK3/FvaYxcx+3lSVv3P8tjPeyanOsu9u3b5/mz59v/vzAAw+UKcNY9577e72y2LrH1dsxwnk7MPk67t44fvx4iSc833vvvWW+sLmP5ZCQEN1999364IMP9PXXX2vr1q2aM2eOuRD/nj179MADD5jrIaEkf8S8pvFirFuvtsb6/v37zfWfunbtqq5du3osz1ivW+rruZ2pkTa0efNm3XPPPdWqu2nTpkqnHFnF/cpAZV9q3ctcuHDB0jbqCyviXvy6VOW1lFTmKk91/POf/zS3f/e731Vavnjx7JiYGC1durTEAr6tWrXSxIkT1atXL40ZM0Y5OTlavXq1Ro8eXenJu66pS3Gvaczcx21lfXXfX9/Ge12KuSfff/+9Jk2aZH4IGjFihPm0T3eMde/5eoxw3g5Mtf23MT09Xb/97W/NtWp69eqlO++8s0y54qUPwsLC9Oabb6p79+7mvksvvVQjR47Ur371K40aNUonTpzQZ599pg8++EC33367Jf2sT/wR85rGi7Fuvdoa68UzNqTK7waTGOt1TX09t3NHGAKWe7a4oKCg0vLF845DQkIsbQMXFV+9rcprKdX89fz+++/NJ/t169bN45NGin388cfasWOHkpOTK3yKVXx8vO6//37z5xUrVtSon/WVv+Je05i5311QWV+tfH/WR7U11osdOXJEd955p06dOiVJ+sUvfqGZM2eWW5ax7r3qjhFvPrRKnLcDla/j7snPP/+sxMREffvtt5Kktm3b6q9//Wu5D69YtmyZvvjiC23YsKHEF2N3zZo105///GfzZ7uO5cr4I+Y1jRdj3Xq1MdZzcnK0du1aSVLLli3LvWBVGmO9bqmv53buCLOhyy67TL/97W+rVbd4eok/FK8FIBU9LbAyxWXcTwJWtFFfWBH34tfTm9fS/QTrzS2wnmzcuNHcvvXWW72uV/qJVOUZPHiwOfXqiy++qHLfAl1di3tNYlaV8e6+v76N97oW89L27NmjP/zhD+Z6EgkJCXr99dc9xomx7p3SY8TTl5/qxJXzdmDyddwr8sMPP2jSpEn6/vvvJRU9pGTRokWKjY2tsE5UVJSioqI8ttunTx9FRUUpOztbu3fvlsvl8vgEMjvyV8xrEi/GuvVqY6xv3brVjM0tt9zi9VhkrNcdDRo0MLerO1ataMNqJMJsKC4ursQ6DYEqOjra3M7MzPRYtqCgQLm5uZJU4gOWFW3UF1bEvfj1zM7O1oULFzw+KMF9ccOavp4pKSmSJIfDof79+9eordJiY2MVExOjjIwMc4pVfVKX416RimJWlfHuvr++jfe6HPOPPvpI06ZNM/8Wd+zYUW+88UaJhxtUV30f695wHyMZGRkeL25VJ66ctwOTr+Nenj179uiee+4xp0O2atVKixcvrvRpYd4ICgpS27ZtlZqaqvz8fGVkZBD/Umoj5hWpKF6MdevVRtyLP6NLsvwzOmM9MLi/jyobq+5l3GNlRRtWI6WKgNWuXTtz+/jx4x7L/vTTT+Yiiu6LqFvRBi4qfj1dLpc5v78iJ06cMLdr8sH3zJkz2r17t6Si6VHNmjWrdlsV4TZ7z2oj7pUpL2ZVGe/u+xnvZdVGzN99911NnjzZ/LLTvXt3LV682KunC3nL7mO9KmPEPa7ejhHO24HJ13EvbevWrfqv//ovMwnWvn17LV26VG3atKlWe+Wx+1iujL9jXpmanrMZ697xd9wLCwv10UcfSSqayuiLtTcZ67Xv0ksvNe/oOnbsmMeyhYWF+vnnnyWVfF9Z0YbVSIQhYDVu3FhNmjSRJB06dMhj2cOHD5vb8fHxlraBi+Li4sxt99erPMWvd6NGjWqUvNq+fbu5wGLfvn29qrN//34tWrRIzzzzjJlEq0jxFSZJ5hNqUJI/4m5FzK644gpz3RnGe834e6wvXbpUf/rTn8xFUXv37q033nij0un4jPWqcY9rZWOkeL/D4VCHDh28ap/zdmDyddzdpaSk6N577zUT2l26dNGSJUsq/dvw448/avHixXruuee0ZcuWSo9TnKAPCQmx5I7R+sbXMbciXox16/lzrEvSN998Y55X+/TpU+7af6Ux1uum4vfWkSNHPJb79ttvze9tpceqFW1YiUQYAlqPHj0kSfv27dPZs2crLFe8kLokXXfddZa3gSLur4v761VaVlaW9u3bJ+ni619dX375pbndpUsXr+ocPHhQTz31lBYuXKh169Z5LPv555+bizZ6277d+CPuVsQsMjJS11xzjSRp165dHh+1XfzvcDqdFS7Uamf+HOtr167V7NmzZRiGJGno0KFasGCBV+tCMNarpmvXrubV9R07dlRYrrCwUDt37pRUtEab+3SbynDeDjz+iLtUNMamTp1qjrNevXrprbfe8uquzpMnT2revHl67bXXtHz5co9lv/vuO/Nul06dOikoKKhK/bQDX8fcqngx1q3lr7FezP0ClLfnVcZ63VQ8Vk+fPu0xyeo+Vkt/LrSiDSuRCENAK55rfuHCBS1durTcMtnZ2Vq9erUk6fLLL1dCQoLlbaBI+/btzaeyrV69Wjk5OeWWe/vtt81M/oABA2p0zG+++UZS0RUrb54WKUnXX3+9uZjmunXrlJ2dXW45wzD02muvmT8PGTKkRn2tr/wRd6tidsstt0iSTp06pX//+9/ltnH06FFt3rxZknTDDTdwhbEc/hrrR44c0Z/+9CczCTZ69Gg99dRTHtckc8dYr5ro6Gj17NlTUtGdOxVNT1i3bp35sIKqxpXzduDxR9xPnz6tqVOnmgtw9+7dW6+++mqJxbs9ueaaa8yFs7du3epx6swrr7xibtt1LFfG1zG3Kl6MdWv5Y6y7+/rrr83tjh07elWHsV43ua//9s9//rPcMoWFhXrnnXckFa0JduONN1rehpVIhCGg9evXT61bt5YkvfTSS9q1a1eJ/fn5+Xr44YfN23InTpzokzZwUfHrc+bMGT366KMlnjojSTt37jRPXK1atTITE9VVfDt8mzZtKn26TLFmzZrpN7/5jSQpPT1djz32WJl+ulwuzZ07V59//rmkoisOvvxjW9f5Ou5WxWzEiBHmdLonn3yyzO3X2dnZmj59unnHAuO9Yv4Y64899pj5ZKB+/fpp9uzZXk2tKMZYr7oJEyZIKlp8evr06WWSh4cPH9bcuXMlFT3laeTIkVVqn/N2YPJ13J988knzi3Xnzp31/PPPe3xiXWlhYWEaNWqU2ccHH3yw3MT2G2+8offee09S0XpIw4YNq1I/7cSXMbcqXox16/l6rJduSyqatug+LdMTxnrd1KVLF3MNuJUrV5a52GwYhubMmaNvv/1WkjRu3Lgyd/Zb0YaVHEbxZVjAIu+++64ee+wxSdL8+fN1xx13VFh2x44dSkxMlFR0q3N52eGUlBRNnjxZhmEoJCREI0eO1LXXXquMjAytWLFCBw8elCRde+21WrJkSbm3zVrRBoq4XC6NHj1ae/bskSRdeeWVGjlypGJiYvTll19q5cqVKigokMPh0Ouvv65f/epXZdpwf48MGzZMTz31VLnHOnPmjHllq2vXrlq2bJnX/Tx27JhGjhypU6dOSSpaP2rEiBFq2bKlTpw4oTVr1mj//v2SitYLsnoh3/rGH3G3KmZLlizRk08+KanoQ964ceN01VVX6fjx41q6dKmOHj0qSbrtttv03HPPWfQK1T++jvnWrVs1adIkSUVPhnriiScUExNTab9atGhR4sozY73q7r//fm3YsEGS1Lp1a40dO1YtWrTQ3r17tXTpUvMOwNmzZ2vMmDEl6nLerrt8FfcjR47otttuM+/sfPjhh9W2bdtK+xMTE1NiavrZs2c1cuRIfffdd5Kk5s2ba/To0WrXrp3OnDmjDz74wEyUNGjQQIsXL1anTp2q+3LYgi/HulXxYqxbz9d/44t17dpVubm5atq0qbZt2+Z1/xjr/uMez6SkJE2ZMqXcckePHjXXYm7VqlWJp4EWS01N1ejRo83PfoMGDdKNN96ovLw8rVmzxlzOpl27dnr33XfNxfGtbsMqJMJgOasTYVLRQspz58417+IorVOnTvr73//ucR0KK9pAkfT0dE2aNEmpqanl7g8JCdFf/vKXCq8yeZsI279/v3kr9E033aSFCxdWqZ8HDhzQlClT9P3331dYpkOHDnr++ee9vpJlZ/6Iu1Ux+9vf/qbXXntNFZ3ibr75Zr3wwgs+vdJUH/gy5tOnT690Xa/ylPfeYaxXzblz55SUlFThFxeHw6GkpCQlJSWV2cd5u+7yVdyfffZZvf7661XuT3nvn+PHjyspKclcFqE8LVq00LPPPqtu3bpV+Zh24+uxblW8GOvW8sff+OzsbDOmV1xxRYXLUVSEse4fVibCJGnjxo364x//aD4QpbS2bdvqjTfe8HjB0Yo2rODdIhxALRs7dqy6d++uxYsX69NPP9XPP/+skJAQxcfHa9CgQRo1alSlj9e1og0UiY2N1YoVK7R8+XKtXbtWR44cMa8IXX/99Zo4caKuvPLKGh/HfV2i6izkeeWVV+r999/XqlWrtH79eh08eFA5OTmKiYlR+/btNWDAAA0bNqxK0zfszB9xtypmDzzwgG6++Wa9/fbb2rVrl06fPq2IiAhdddVVGj58uAYPHlylKXh25cuYF1/ltwJjvWoiIiK0cOFCJScna82aNeZC1TExMerWrZsSExNr/MWD83bg8VXcrRzLLVq0MP/erFu3TqmpqcrKylJUVJTatWunW265RaNHj/bpXQL1ia/HulXxYqxbyx9/42v6GZ2xXjf169dP69at06JFi/Txxx/rxIkTcjgcuvzyy9W/f3/dddddlcbMijaswB1hAAAAAAAAsAUWywcAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAtkAgDAAAAAACALZAIAwAAAAAAgC2QCAMAAAAAAIAt/H/pfqdf4GpPFQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 609 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.histplot(zbars)\n", + "\n", + "savefig(plt.gcf(), \"figures/hist_sampling_dist_mean_rvZ_n20.png\")" + ] + }, + { + "cell_type": "markdown", + "id": "b47e68a3-ac05-4741-a4b6-9a134602d002", + "metadata": {}, + "source": [ + "The above figure shows the sampling distribution of the mean for samples of size $n=20$ from the standard normal.\n", + "The histogram shows the \"density of diamond shapes,\"\n", + "and provides a representation of the sampling distribution of the mean $\\overline{\\mathbf{Z}} = \\Mean(\\mathbf{Z})$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53ca2c67-5dfd-4f11-b09d-79df63f415a3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "cf0b9b93-94be-44a5-bc84-ec3a1b51cea8", + "metadata": {}, + "source": [ + "#### Verifying p-values" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3cbe13fa-2b49-44d9-b1e1-5a13d7f08b71", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0505" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats import norm, ttest_1samp\n", + "\n", + "muK = 1000\n", + "sigmaK = 10\n", + "rvK = norm(muK, sigmaK)\n", + "\n", + "count = 0\n", + "for j in range(0, 10000):\n", + " sample = rvK.rvs(20)\n", + " res = ttest_1samp(sample, popmean=muK)\n", + " if res.pvalue < 0.05:\n", + " count = count + 1\n", + "\n", + "count / 10000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28424dd6-5f88-4430-b687-b4b9c62ae0c7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "46c80055-9d14-4a4a-81bd-580d486ff2c8", + "metadata": {}, + "source": [ + "#### Verifying confidence intervals" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6bfd78ff-0981-48c4-9b9c-4a51b61b57b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9049" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "np.random.seed(10)\n", + "\n", + "muK = 1000\n", + "sigmaK = 10\n", + "rvK = norm(muK, sigmaK)\n", + "\n", + "count = 0\n", + "for j in range(0, 10000):\n", + " sample = rvK.rvs(20)\n", + " res = ttest_1samp(sample, popmean=1000)\n", + " ci = res.confidence_interval(confidence_level=0.90)\n", + " if ci.low <= muK <= ci.high:\n", + " count = count + 1\n", + "\n", + "count / 10000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4230b1f6-e1c3-4958-86e4-0b610cd248f8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "258a6595-ca59-43a9-a8ab-d7e491c4249b", + "metadata": {}, + "source": [ + "### Resampling methods\n", + "\n", + "Clever techniques that reuse data from observed sample to simulate the variability in the population.\n" + ] + }, + { + "cell_type": "markdown", + "id": "dbca2f75-72fe-4bdc-a36d-85a8e7bcfefd", + "metadata": {}, + "source": [ + "#### Bootstrap estimation" + ] + }, + { + "cell_type": "markdown", + "id": "ee95e6dd-b68e-47b3-9c1f-eab5c1307844", + "metadata": {}, + "source": [ + "Generate 5000 bootstrap samples (sampling with replacement) from the sample `pricesW`.\n", + "Use the bootstrap samples to approximate the sampling distribution of the mean." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4e97a358-eb5c-45e3-a3ac-d7d6113ee97b", + "metadata": {}, + "outputs": [], + "source": [ + "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", + "import pandas as pd\n", + "epriceswide = pd.read_csv(DATA_URL)\n", + "pricesW = epriceswide[\"West\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "c7b71806-22c0-4ae0-9433-c1385e589be8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 609 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "n = len(pricesW)\n", + "xbars_boot = []\n", + "for i in range(0, 5000):\n", + " bsample = np.random.choice(pricesW, n, replace=True)\n", + " xbar_boot = mean(bsample)\n", + " xbars_boot.append(xbar_boot)\n", + "\n", + "sns.histplot(xbars_boot)\n", + "\n", + "savefig(plt.gcf(), \"figures/bootstrap_dist_mean_epricesW.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c5b254d-d587-488f-8037-89d17e45f7d2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "15556f6e-071f-44c9-be7d-b7c5c1853310", + "metadata": {}, + "source": [ + "#### Permutation test" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "37420af1-c6c6-4594-9b0c-39ac34057189", + "metadata": {}, + "outputs": [], + "source": [ + "DATA_URL = \"https://nobsstats.com/datasets/epriceswide.csv\"\n", + "import pandas as pd\n", + "epriceswide = pd.read_csv(DATA_URL)\n", + "pricesW = epriceswide[\"West\"]\n", + "pricesE = epriceswide[\"East\"]" + ] + }, + { + "cell_type": "markdown", + "id": "2543134d-ace3-41c8-9cc9-de8e0ea1c868", + "metadata": {}, + "source": [ + "We'll compare the prices in the two parts of the city in terms\n", + "of the difference between the average price in each sample." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c8204bcd-6126-4e57-9198-ae3a62b45912", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.0" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def dmeans(xsample, ysample):\n", + " dhat = mean(xsample) - mean(ysample)\n", + " return dhat\n", + "\n", + "# Calculate the observed difference between means\n", + "dprice = dmeans(pricesW, pricesE)\n", + "dprice" + ] + }, + { + "cell_type": "markdown", + "id": "5ecb7f41-101c-4f4e-b047-3be29847a962", + "metadata": {}, + "source": [ + "Obtain sampling distribution of the difference between means under the null hypothesis." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "cb0fd666-98ba-4a13-aa05-782b095886d8", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "pdhats = []\n", + "for i in range(0, 10000):\n", + " allprices = np.concatenate((pricesW, pricesE))\n", + " pallprices = np.random.permutation(allprices)\n", + " psampleW = pallprices[0:len(pricesW)]\n", + " psampleE = pallprices[len(pricesW):]\n", + " pdhat = dmeans(psampleW, psampleE)\n", + " pdhats.append(pdhat)" + ] + }, + { + "cell_type": "markdown", + "id": "81fda50e-82fc-4b29-aacc-3c67d0a38d1b", + "metadata": {}, + "source": [ + "Compute the p-value of the observed difference between means `dprice` under the null hypothesis." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8cd155d6-635b-4b87-ba69-d731e27722f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0002" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tails = [d for d in pdhats if abs(d) > dprice]\n", + "pvalue = len(tails) / len(pdhats)\n", + "pvalue" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "77788fef-cb44-4123-b71a-553f7206b9c5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 184, + "width": 605 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot the sampling distribution in blue\n", + "ax = sns.histplot(pdhats, bins=100)\n", + "\n", + "# plot red line for the observed statistic\n", + "plt.axvline(dprice, color=\"red\")\n", + "\n", + "# plot the values that are equal or more extreme in red\n", + "sns.histplot(tails, ax=ax, bins=100, color=\"red\")\n", + "_ = ax.set_ylabel(\"$f_{\\widehat{D}_0}$\")\n", + "\n", + "savefig(plt.gcf(), \"figures/pvalue_viz_permutation_test_eprices.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d618f93-3caf-4d83-9c30-3ad74a6f9a26", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "9d8f4aa1-aaf0-4009-a033-eb320a9cb834", + "metadata": {}, + "source": [ + "### Statistics procedures as code" + ] + }, + { + "cell_type": "markdown", + "id": "822f7397-c208-455e-885f-a39ad2f3f6b2", + "metadata": {}, + "source": [ + "#### Generating sampling distributions" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "845c437c-9503-48d5-aece-2398a70a9a8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 201, + "width": 564 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "def gen_sampling_dist(rv, estfunc, n, N=10000):\n", + " \"\"\"\n", + " Simulate `N` samples of size `n` from the random variable `rv` to\n", + " generate the sampling distribution of the estimator `estfunc`.\n", + " \"\"\"\n", + " estimates = []\n", + " for i in range(0, N):\n", + " sample = rv.rvs(n)\n", + " estimate = estfunc(sample)\n", + " estimates.append(estimate)\n", + " return estimates\n", + "\n", + "zbars = gen_sampling_dist(rvZ, estfunc=mean, n=20)\n", + "sns.histplot(zbars)" + ] + }, + { + "cell_type": "markdown", + "id": "80120207-c5cf-4b3a-aca2-e7f67f2c8467", + "metadata": {}, + "source": [ + "#### Generating bootstrap approximations to sampling distributions" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cabba8c3-9654-4763-aeec-c306aa0ddffb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "image/png": { + "height": 201, + "width": 564 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "def gen_boot_dist(sample, estfunc, B=5000):\n", + " \"\"\"\n", + " Generate estimates from the sampling distribution of the estimator `estfunc`\n", + " based on `B` bootstrap samples (sampling with replacement) from `sample`.\n", + " \"\"\"\n", + " n = len(sample)\n", + " bestimates = []\n", + " for i in range(0, B):\n", + " bsample = np.random.choice(sample, n, replace=True)\n", + " bestimate = estfunc(bsample)\n", + " bestimates.append(bestimate)\n", + " return bestimates\n", + "\n", + "\n", + "zbars_boot = gen_boot_dist(pricesW, estfunc=mean)\n", + "sns.histplot(zbars_boot)" + ] + }, + { + "cell_type": "markdown", + "id": "4ebbc7ee-f2d5-40a9-a9e6-eeb12a1e864e", + "metadata": {}, + "source": [ + "#### The permutation test for comparing two groups" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "3a3c576b-51ad-4055-9c8e-71107d47279b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0002" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def permutation_test_dmeans(xsample, ysample, P=10000):\n", + " \"\"\"\n", + " Compute the p-value of the observed difference between means\n", + " `dmeans(xsample,ysample)` under the null hypothesis where\n", + " the group membership is randomized.\n", + " \"\"\"\n", + " # 1. Compute the observed difference between means\n", + " obsdhat = dmeans(xsample, ysample)\n", + "\n", + " # 2. Get sampling dist. of `dmeans` under H0\n", + " pdhats = []\n", + " allprices = np.concatenate((pricesW, pricesE))\n", + " for i in range(0, P):\n", + " pallprices = np.random.permutation(allprices)\n", + " psampleW = pallprices[0:len(pricesW)]\n", + " psampleE = pallprices[len(pricesW):]\n", + " pdhat = dmeans(psampleW, psampleE)\n", + " pdhats.append(pdhat)\n", + "\n", + " # 3. Compute the p-value\n", + " tails = [d for d in pdhats if abs(d) > obsdhat]\n", + " pvalue = len(tails) / len(pdhats)\n", + " return pvalue\n", + "\n", + "np.random.seed(42)\n", + "permutation_test_dmeans(pricesW, pricesE)" + ] + }, + { + "cell_type": "markdown", + "id": "762cfeda-989e-4cd2-80af-63d4fcdac9f4", + "metadata": {}, + "source": [ + "\n", + "See the file [stats_helpers.py](https://github.com/minireference/noBSstatsnotebooks/blob/main/notebooks/stats_helpers.py)\n", + "for more examples of Python functions that \n", + "for definitions all the important statistical analysis procedures in STATS 101.\n", + "\n", + "In the past, students first contact with statistics was presented as a bunch of procedures\n", + "without explanation, and students were supposed to memorize when to use which \"recipe\".\n", + "Statistics instructors always had to \"skip the details\" because it's super complicated to\n", + "explain all the details (probability models, sampling distributions, p-value calculations, etc.).\n", + "\n", + "Now that we have Python on our side, we don't have to water-down the material,\n", + "but can instead show all the detailed calculations for statistical tests,\n", + "as easy-to-understand Python source code, which makes it much much easier to understand what is going on.\n", + "Currently,\n", + "the file [stats_helpers.py](https://github.com/minireference/noBSstatsnotebooks/blob/main/notebooks/stats_helpers.py)\n", + "is 400 lines of code.\n", + "With a little bit of Python knowledge,\n", + "you can read this file and understand all of statistics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f812d89a-d782-4879-a7f0-51b587ca2a51", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "4ebe1028-d088-41fb-92d8-0518649e9fd5", + "metadata": {}, + "source": [ + "## Links\n", + "\n", + "- Book website [noBSstats.com](https://nobsstats.com/intro.html): contains all the notebooks, demos, and visualizations from the book.\n", + "- [Detailed book outline](https://docs.google.com/document/d/1fwep23-95U-w1QMPU31nOvUnUXE2X3s_Dbk5JuLlKAY/edit): continuously updated list of the topics that are covered in each section.\n", + "- [Python tutorial](https://nobsstats.com/tutorials/python_tutorial.html)\n", + "- [Pandas tutorial](https://nobsstats.com/tutorials/pandas_tutorial.html)\n", + "- [Seaborn tutorial](https://nobsstats.com/tutorials/seaborn_tutorial.html)\n", + "- Previous blog posts:\n", + " - [Outline of the stats curriculum research](https://minireference.com/blog/fixing-the-introductory-statistics-curriculum/)\n", + " - [Book proposal](https://minireference.com/blog/no-bullshit-guide-to-statistics-progress-update/)\n", + " - [Stats survey results](https://minireference.com/blog/what-stats-do-people-want-to-learn/)\n", + "- [There's Only One Test](https://www.youtube.com/watch?v=S41zQEshs5k) talk by Allen B. Downey\n", + "- [Statistics for Hackers](https://www.youtube.com/watch?v=Iq9DzN6mvYA) talk by Jake Vanderplas\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}