From 206a3d876f79e137af88b85ba98aff171e8d8e06 Mon Sep 17 00:00:00 2001 From: caf3676 Date: Fri, 4 Mar 2022 16:08:46 -0600 Subject: [PATCH] Update Interactive_Hypothesis_Testing.ipynb --- Interactive_Hypothesis_Testing.ipynb | 144 ++++++++++++++------------- 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/Interactive_Hypothesis_Testing.ipynb b/Interactive_Hypothesis_Testing.ipynb index 21d100e..e98ab7c 100644 --- a/Interactive_Hypothesis_Testing.ipynb +++ b/Interactive_Hypothesis_Testing.ipynb @@ -229,80 +229,82 @@ "alpha = widgets.FloatSlider(min=0, max = 50, value = 3, step = 1.0, description = '$α$',orientation='horizontal',layout=Layout(width='300px', height='30px'),continuous_update=False)\n", "alpha.style.handle_color = 'gray'\n", "\n", - "ui3 = widgets.VBox([L,alpha],) # basic widget formatting \n", + "ui3 = widgets.VBox([L,alpha],) # basic widget formatting \n", "\n", "ui4 = widgets.HBox([ui1,ui2,ui3],) # basic widget formatting \n", "\n", "ui2 = widgets.VBox([l,ui4],)\n", "\n", - "def f_make(n1, m1, s1, n2, m2, s2, L, alpha): # function to take parameters, make sample and plot\n", + "def f_make(n1, m1, s1, n2, m2, s2, L, alpha): # function to take parameters, make sample and plot\n", + " #this will prevent a ZeroDivisionError when the number of either sample is 1. \n", + " try: \n", + " np.random.seed(73073)\n", + " x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n", + " np.random.seed(73074)\n", + " x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n", "\n", - " \n", - " np.random.seed(73073)\n", - " x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n", - " np.random.seed(73074)\n", - " x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n", - " \n", - " mu = (s2*s2)/(s1*s1)\n", - " nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n", - " \n", - " prop_values = np.linspace(-8.0,8.0,100)\n", - " analytical_distribution = stats.t.pdf(prop_values,df = nu) \n", - " analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n", - " \n", - " # Analytical Method with SciPy\n", - " t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n", - " \n", - " # Bootstrap Method\n", - " global_average = np.average(np.concatenate([x1,x2])) # shift the means to be equal to the globla mean\n", - " x1s = x1 - np.average(x1) + global_average\n", - " x2s = x2 - np.average(x2) + global_average\n", - " \n", - " t_stat = np.zeros(L); p_value = np.zeros(L)\n", - " \n", - " random.seed(73075)\n", - " for l in range(0, L): # loop over realizations\n", - " samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n", - " #print(samples1)\n", - " samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n", - " #print(samples2)\n", - " t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n", - " \n", - " bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n", - " bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n", - " \n", - " plt.subplot(121)\n", - " #print(t_stat)\n", - " \n", - " plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n", - " plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n", - " plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n", - " plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n", - " plt.legend()\n", - " \n", - " #plt.hist(x2)\n", - " \n", - " plt.subplot(122)\n", - " plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n", - " plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n", - " plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n", - " plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n", - " plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n", - " plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n", - " plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n", - "\n", - " plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n", - " plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n", - " ax = plt.gca()\n", - " handles,labels = ax.get_legend_handles_labels()\n", - " handles = [handles[0], handles[2], handles[3], handles[1]]\n", - " labels = [labels[0], labels[2], labels[3], labels[1]]\n", - "\n", - " plt.legend(handles,labels,loc=1)\n", - " \n", - " \n", - " plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n", - " plt.show()\n", + " mu = (s2*s2)/(s1*s1)\n", + " nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n", + "\n", + " prop_values = np.linspace(-8.0,8.0,100)\n", + " analytical_distribution = stats.t.pdf(prop_values,df = nu) \n", + " analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n", + "\n", + " # Analytical Method with SciPy\n", + " t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n", + "\n", + " # Bootstrap Method\n", + " global_average = np.average(np.concatenate([x1,x2])) # shift the means to be equal to the globla mean\n", + " x1s = x1 - np.average(x1) + global_average\n", + " x2s = x2 - np.average(x2) + global_average\n", + "\n", + " t_stat = np.zeros(L); p_value = np.zeros(L)\n", + "\n", + " random.seed(73075)\n", + " for l in range(0, L): # loop over realizations\n", + " samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n", + " #print(samples1)\n", + " samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n", + " #print(samples2)\n", + " t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n", + "\n", + " bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n", + " bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n", + "\n", + " plt.subplot(121)\n", + " #print(t_stat)\n", + "\n", + " plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n", + " plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n", + " plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n", + " plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n", + " plt.legend()\n", + "\n", + " #plt.hist(x2)\n", + "\n", + " plt.subplot(122)\n", + " plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n", + " plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n", + " plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n", + " plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n", + " plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n", + " plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n", + " plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n", + "\n", + " plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n", + " plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n", + " ax = plt.gca()\n", + " handles,labels = ax.get_legend_handles_labels()\n", + " handles = [handles[0], handles[2], handles[3], handles[1]]\n", + " labels = [labels[0], labels[2], labels[3], labels[1]]\n", + "\n", + " plt.legend(handles,labels,loc=1)\n", + "\n", + "\n", + " plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n", + " plt.show()\n", + " except ZeroDivisionError:\n", + " print(\"Oops! Recall that you must have more than 1 sample of n1 and n2.\")\n", "\n", "\n", "# connect the function to make the samples and plot to the widgets \n", @@ -342,7 +344,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ce2cd3c3c791421b9208affa30d477f8", + "model_id": "bfe242ef0b4f4550afddc52c8fba9394", "version_major": 2, "version_minor": 0 }, @@ -356,12 +358,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "46b167d156c74ca5a69dcfca6cb2f270", + "model_id": "5e3b7fc88ca242be9637f233298bdfd0", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Output()" + "Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '
', 'i…" ] }, "metadata": {},