From 206a3d876f79e137af88b85ba98aff171e8d8e06 Mon Sep 17 00:00:00 2001
From: caf3676 <carlos.figueroadiaz@austin.utexas.edu>
Date: Fri, 4 Mar 2022 16:08:46 -0600
Subject: [PATCH] Update Interactive_Hypothesis_Testing.ipynb

---
 Interactive_Hypothesis_Testing.ipynb | 144 ++++++++++++++-------------
 1 file changed, 73 insertions(+), 71 deletions(-)

diff --git a/Interactive_Hypothesis_Testing.ipynb b/Interactive_Hypothesis_Testing.ipynb
index 21d100e..e98ab7c 100644
--- a/Interactive_Hypothesis_Testing.ipynb
+++ b/Interactive_Hypothesis_Testing.ipynb
@@ -229,80 +229,82 @@
     "alpha = widgets.FloatSlider(min=0, max = 50, value = 3, step = 1.0, description = '$α$',orientation='horizontal',layout=Layout(width='300px', height='30px'),continuous_update=False)\n",
     "alpha.style.handle_color = 'gray'\n",
     "\n",
-    "ui3 = widgets.VBox([L,alpha],)                                # basic widget formatting \n",
+    "ui3 = widgets.VBox([L,alpha],)                               # basic widget formatting \n",
     "\n",
     "ui4 = widgets.HBox([ui1,ui2,ui3],)                               # basic widget formatting \n",
     "\n",
     "ui2 = widgets.VBox([l,ui4],)\n",
     "\n",
-    "def f_make(n1, m1, s1, n2, m2, s2, L, alpha):                                      # function to take parameters, make sample and plot\n",
+    "def f_make(n1, m1, s1, n2, m2, s2, L, alpha):        # function to take parameters, make sample and plot\n",
+    "    #this will prevent a ZeroDivisionError when the number of either sample is 1. \n",
+    "    try:       \n",
+    "        np.random.seed(73073)\n",
+    "        x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n",
+    "        np.random.seed(73074)\n",
+    "        x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n",
     "\n",
-    "    \n",
-    "    np.random.seed(73073)\n",
-    "    x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n",
-    "    np.random.seed(73074)\n",
-    "    x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n",
-    "  \n",
-    "    mu = (s2*s2)/(s1*s1)\n",
-    "    nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n",
-    "    \n",
-    "    prop_values = np.linspace(-8.0,8.0,100)\n",
-    "    analytical_distribution = stats.t.pdf(prop_values,df = nu) \n",
-    "    analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n",
-    "    \n",
-    "    # Analytical Method with SciPy\n",
-    "    t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n",
-    "    \n",
-    "    # Bootstrap Method\n",
-    "    global_average = np.average(np.concatenate([x1,x2]))   # shift the means to be equal to the globla mean\n",
-    "    x1s = x1 - np.average(x1) + global_average\n",
-    "    x2s = x2 - np.average(x2) + global_average\n",
-    "    \n",
-    "    t_stat = np.zeros(L); p_value = np.zeros(L)\n",
-    "    \n",
-    "    random.seed(73075)\n",
-    "    for l in range(0, L):                      # loop over realizations\n",
-    "        samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n",
-    "        #print(samples1)\n",
-    "        samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n",
-    "        #print(samples2)\n",
-    "        t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n",
-    "      \n",
-    "    bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n",
-    "    bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n",
-    "    \n",
-    "    plt.subplot(121)\n",
-    "    #print(t_stat)\n",
-    "   \n",
-    "    plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n",
-    "    plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n",
-    "    plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n",
-    "    plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n",
-    "    plt.legend()\n",
-    "    \n",
-    "    #plt.hist(x2)\n",
-    "    \n",
-    "    plt.subplot(122)\n",
-    "    plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n",
-    "    plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n",
-    "    plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n",
-    "    plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n",
-    "    plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n",
-    "    plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n",
-    "    plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n",
-    "\n",
-    "    plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
-    "    plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
-    "    ax = plt.gca()\n",
-    "    handles,labels = ax.get_legend_handles_labels()\n",
-    "    handles = [handles[0], handles[2], handles[3], handles[1]]\n",
-    "    labels = [labels[0], labels[2], labels[3], labels[1]]\n",
-    "\n",
-    "    plt.legend(handles,labels,loc=1)\n",
-    "    \n",
-    "    \n",
-    "    plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n",
-    "    plt.show()\n",
+    "        mu = (s2*s2)/(s1*s1)\n",
+    "        nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n",
+    "\n",
+    "        prop_values = np.linspace(-8.0,8.0,100)\n",
+    "        analytical_distribution = stats.t.pdf(prop_values,df = nu) \n",
+    "        analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n",
+    "\n",
+    "        # Analytical Method with SciPy\n",
+    "        t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n",
+    "\n",
+    "        # Bootstrap Method\n",
+    "        global_average = np.average(np.concatenate([x1,x2]))   # shift the means to be equal to the globla mean\n",
+    "        x1s = x1 - np.average(x1) + global_average\n",
+    "        x2s = x2 - np.average(x2) + global_average\n",
+    "\n",
+    "        t_stat = np.zeros(L); p_value = np.zeros(L)\n",
+    "\n",
+    "        random.seed(73075)\n",
+    "        for l in range(0, L):                      # loop over realizations\n",
+    "            samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n",
+    "            #print(samples1)\n",
+    "            samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n",
+    "            #print(samples2)\n",
+    "            t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n",
+    "\n",
+    "        bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n",
+    "        bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n",
+    "\n",
+    "        plt.subplot(121)\n",
+    "        #print(t_stat)\n",
+    "\n",
+    "        plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n",
+    "        plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n",
+    "        plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n",
+    "        plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n",
+    "        plt.legend()\n",
+    "\n",
+    "        #plt.hist(x2)\n",
+    "\n",
+    "        plt.subplot(122)\n",
+    "        plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n",
+    "        plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n",
+    "        plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n",
+    "        plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n",
+    "        plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n",
+    "        plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n",
+    "        plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n",
+    "\n",
+    "        plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
+    "        plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
+    "        ax = plt.gca()\n",
+    "        handles,labels = ax.get_legend_handles_labels()\n",
+    "        handles = [handles[0], handles[2], handles[3], handles[1]]\n",
+    "        labels = [labels[0], labels[2], labels[3], labels[1]]\n",
+    "\n",
+    "        plt.legend(handles,labels,loc=1)\n",
+    "\n",
+    "\n",
+    "        plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n",
+    "        plt.show()\n",
+    "    except ZeroDivisionError:\n",
+    "        print(\"Oops! Recall that you must have more than 1 sample of n1 and n2.\")\n",
     "\n",
     "\n",
     "# connect the function to make the samples and plot to the widgets    \n",
@@ -342,7 +344,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ce2cd3c3c791421b9208affa30d477f8",
+       "model_id": "bfe242ef0b4f4550afddc52c8fba9394",
        "version_major": 2,
        "version_minor": 0
       },
@@ -356,12 +358,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "46b167d156c74ca5a69dcfca6cb2f270",
+       "model_id": "5e3b7fc88ca242be9637f233298bdfd0",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Output()"
+       "Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<Figure size 432x288 with 2 Axes>', 'i…"
       ]
      },
      "metadata": {},