Moved examples 1BT and 2BT to new ch3 problems nb

minireference · Jul 27, 2023 · 47989e8 · 47989e8
1 parent ab43e67
commit 47989e8
Show file tree

Hide file tree

Showing 4 changed files with 393 additions and 231 deletions.
diff --git a/notebooks/34_analytical_approx.ipynb b/notebooks/34_analytical_approx.ipynb
@@ -2049,223 +2049,10 @@
     "### Bootstrap estimate of the standard error\n",
     "\n",
     "Another way to obtain the standard error of the mean\n",
-    "(the standard deviation sampling distribution of the mean)\n",
-    "is to use the bootstrap approach.\n",
+    "(the standard deviation of the sampling distribution of the mean)\n",
+    "is to use the bootstrap estimate.\n",
     "\n",
-    "\n",
-    "TODO: import formulas"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b85af6aa-078a-4a02-9f0c-a71871d0985a",
-   "metadata": {},
-   "source": [
-    "#### Example 1BT: test for the mean of Batch 04"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 132,
-   "id": "65005726-e0a2-421c-aada-27842968d6eb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "kombucha = pd.read_csv(\"../datasets/kombucha.csv\")\n",
-    "ksample04 = kombucha[kombucha[\"batch\"]==4][\"volume\"]\n",
-    "n04 = len(ksample04)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 133,
-   "id": "ed62b815-5faf-458e-9849-85cb09a76892",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.2252842390160474"
-      ]
-     },
-     "execution_count": 133,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# bootstrap estimate for standard error of the mean\n",
-    "from stats_helpers import gen_boot_dist\n",
-    "kbars_boot04 = gen_boot_dist(ksample04, estfunc=mean)\n",
-    "sehat_boot04 = std(kbars_boot04)\n",
-    "sehat_boot04"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 134,
-   "id": "3769ca83-069b-407f-8712-4f3b44fc8fbe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3.128661805915673"
-      ]
-     },
-     "execution_count": 134,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# compute the t statistic using bootstrap se\n",
-    "obst04bt = (obsmean04 - muK0) / sehat_boot04\n",
-    "obst04bt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 135,
-   "id": "5316bae0-aae8-4b97-8602-68fe70e7a248",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.012147863549560177"
-      ]
-     },
-     "execution_count": 135,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from scipy.stats import t as tdist\n",
-    "rvT04 = tdist(n04 - 1)\n",
-    "pvalue04bt = tailprobs(rvT, obst04bt, alt=\"two-sided\")\n",
-    "pvalue04bt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "78df6c04-514c-4515-9ef8-5700de0861e9",
-   "metadata": {},
-   "source": [
-    "The $p$-value is very small,\n",
-    "so our decision is to reject $H_0$."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "275cff66-0f71-46ed-ac3f-4448936397f0",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "id": "660fa68f-0140-4d7d-91f9-47466f4c881c",
-   "metadata": {},
-   "source": [
-    "#### Example 2BT: test for the mean of Batch 01"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 136,
-   "id": "90aa8990-0fa7-4e80-9206-d62c34a3990e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "kombucha = pd.read_csv(\"../datasets/kombucha.csv\")\n",
-    "ksample01 = kombucha[kombucha[\"batch\"]==1][\"volume\"]\n",
-    "n01 = len(ksample01)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 137,
-   "id": "178af06b-310a-43f1-94c1-daa0b9c5ed1f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.5501517384893604"
-      ]
-     },
-     "execution_count": 137,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# bootstrap estimate for standard error of the mean\n",
-    "from stats_helpers import gen_boot_dist\n",
-    "kbars_boot01 = gen_boot_dist(ksample01, estfunc=mean)\n",
-    "sehat_boot01 = std(kbars_boot01)\n",
-    "sehat_boot01"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 138,
-   "id": "c02d1766-5a31-4963-9603-62d545742e1e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.5781692061148894"
-      ]
-     },
-     "execution_count": 138,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# compute the t statistic using bootstrap se\n",
-    "obst01bt = (obsmean01 - muK0) / sehat_boot01\n",
-    "obst01bt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 139,
-   "id": "0a858812-cd15-4b32-adfb-a3d7f4f1ed6e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.5664736858023267"
-      ]
-     },
-     "execution_count": 139,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from scipy.stats import t as tdist\n",
-    "\n",
-    "rvT01 = tdist(n01-1)\n",
-    "#######################################################\n",
-    "pvalue01bt = tailprobs(rvT01, obst01bt, alt=\"two-sided\")\n",
-    "pvalue01bt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "50db8103-3aee-430e-bf2a-c1ca6ef1262d",
-   "metadata": {},
-   "source": [
-    "The $p$-value is very large,\n",
-    "so we have no reason to reject $H_0$."
+    "See problems...\n"
    ]
   },
   {

diff --git a/notebooks/35_two_sample_tests.ipynb b/notebooks/35_two_sample_tests.ipynb
@@ -2414,27 +2414,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 55,
    "id": "1e0fc5ae-ae1a-4fdc-98af-5f25d8563e88",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "ttest_dmeans() got an unexpected keyword argument 'equal_var'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[54], line 4\u001b[0m\n\u001b[1;32m      2\u001b[0m scoresD \u001b[38;5;241m=\u001b[39m students[students[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcurriculum\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdebate\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m      3\u001b[0m scoresL \u001b[38;5;241m=\u001b[39m students[students[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcurriculum\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlecture\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m----> 4\u001b[0m \u001b[43mttest_dmeans\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscoresD\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscoresL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mequal_var\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
-      "\u001b[0;31mTypeError\u001b[0m: ttest_dmeans() got an unexpected keyword argument 'equal_var'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "students = pd.read_csv(\"../datasets/students.csv\")\n",
     "scoresD = students[students[\"curriculum\"]==\"debate\"][\"score\"]\n",
     "scoresL = students[students[\"curriculum\"]==\"lecture\"][\"score\"]\n",
-    "ttest_dmeans(scoresD, scoresL, equal_var=True)"
+    "# ttest_dmeans(scoresD, scoresL, equal_var=True)"
    ]
   },
   {