Skip to content

ZeroDivisionError fix on f_make (Interactive_Hypothesis_Testing.ipynb) #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 73 additions & 71 deletions Interactive_Hypothesis_Testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -229,80 +229,82 @@
"alpha = widgets.FloatSlider(min=0, max = 50, value = 3, step = 1.0, description = '$α$',orientation='horizontal',layout=Layout(width='300px', height='30px'),continuous_update=False)\n",
"alpha.style.handle_color = 'gray'\n",
"\n",
"ui3 = widgets.VBox([L,alpha],) # basic widget formatting \n",
"ui3 = widgets.VBox([L,alpha],) # basic widget formatting \n",
"\n",
"ui4 = widgets.HBox([ui1,ui2,ui3],) # basic widget formatting \n",
"\n",
"ui2 = widgets.VBox([l,ui4],)\n",
"\n",
"def f_make(n1, m1, s1, n2, m2, s2, L, alpha): # function to take parameters, make sample and plot\n",
"def f_make(n1, m1, s1, n2, m2, s2, L, alpha): # function to take parameters, make sample and plot\n",
" #this will prevent a ZeroDivisionError when the number of either sample is 1. \n",
" try: \n",
" np.random.seed(73073)\n",
" x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n",
" np.random.seed(73074)\n",
" x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n",
"\n",
" \n",
" np.random.seed(73073)\n",
" x1 = np.random.normal(loc=m1,scale=s1,size=n1)\n",
" np.random.seed(73074)\n",
" x2 = np.random.normal(loc=m2,scale=s2,size=n2)\n",
" \n",
" mu = (s2*s2)/(s1*s1)\n",
" nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n",
" \n",
" prop_values = np.linspace(-8.0,8.0,100)\n",
" analytical_distribution = stats.t.pdf(prop_values,df = nu) \n",
" analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n",
" \n",
" # Analytical Method with SciPy\n",
" t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n",
" \n",
" # Bootstrap Method\n",
" global_average = np.average(np.concatenate([x1,x2])) # shift the means to be equal to the globla mean\n",
" x1s = x1 - np.average(x1) + global_average\n",
" x2s = x2 - np.average(x2) + global_average\n",
" \n",
" t_stat = np.zeros(L); p_value = np.zeros(L)\n",
" \n",
" random.seed(73075)\n",
" for l in range(0, L): # loop over realizations\n",
" samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n",
" #print(samples1)\n",
" samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n",
" #print(samples2)\n",
" t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n",
" \n",
" bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n",
" bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n",
" \n",
" plt.subplot(121)\n",
" #print(t_stat)\n",
" \n",
" plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n",
" plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n",
" plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n",
" plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n",
" plt.legend()\n",
" \n",
" #plt.hist(x2)\n",
" \n",
" plt.subplot(122)\n",
" plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n",
" plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n",
" plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n",
" plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n",
" plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n",
" plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n",
" plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n",
"\n",
" plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
" plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
" ax = plt.gca()\n",
" handles,labels = ax.get_legend_handles_labels()\n",
" handles = [handles[0], handles[2], handles[3], handles[1]]\n",
" labels = [labels[0], labels[2], labels[3], labels[1]]\n",
"\n",
" plt.legend(handles,labels,loc=1)\n",
" \n",
" \n",
" plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n",
" plt.show()\n",
" mu = (s2*s2)/(s1*s1)\n",
" nu = ((1/n1 + mu/n2)*(1/n1 + mu/n2))/(1/(n1*n1*(n1-1)) + ((mu*mu)/(n2*n2*(n2-1))))\n",
"\n",
" prop_values = np.linspace(-8.0,8.0,100)\n",
" analytical_distribution = stats.t.pdf(prop_values,df = nu) \n",
" analytical_tcrit = stats.t.ppf(1.0-alpha*0.005,df = nu)\n",
"\n",
" # Analytical Method with SciPy\n",
" t_stat_observed, p_value_analytical = stats.ttest_ind(x1,x2,equal_var=False)\n",
"\n",
" # Bootstrap Method\n",
" global_average = np.average(np.concatenate([x1,x2])) # shift the means to be equal to the globla mean\n",
" x1s = x1 - np.average(x1) + global_average\n",
" x2s = x2 - np.average(x2) + global_average\n",
"\n",
" t_stat = np.zeros(L); p_value = np.zeros(L)\n",
"\n",
" random.seed(73075)\n",
" for l in range(0, L): # loop over realizations\n",
" samples1 = random.choices(x1s, weights=None, cum_weights=None, k=len(x1s))\n",
" #print(samples1)\n",
" samples2 = random.choices(x2s, weights=None, cum_weights=None, k=len(x2s))\n",
" #print(samples2)\n",
" t_stat[l], p_value[l] = stats.ttest_ind(samples1,samples2,equal_var=False)\n",
"\n",
" bootstrap_lower = np.percentile(t_stat,alpha * 0.5)\n",
" bootstrap_upper = np.percentile(t_stat,100.0 - alpha * 0.5)\n",
"\n",
" plt.subplot(121)\n",
" #print(t_stat)\n",
"\n",
" plt.hist(x1,cumulative = False, density = True, alpha=0.4,color=\"red\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_1$')\n",
" plt.hist(x2,cumulative = False, density = True, alpha=0.4,color=\"yellow\",edgecolor=\"black\", bins = np.linspace(0,50,50), label = '$x_2$')\n",
" plt.ylim([0,0.4]); plt.xlim([0.0,30.0])\n",
" plt.title('Sample Distributions'); plt.xlabel('Value'); plt.ylabel('Density')\n",
" plt.legend()\n",
"\n",
" #plt.hist(x2)\n",
"\n",
" plt.subplot(122)\n",
" plt.ylim([0,0.6]); plt.xlim([-8.0,8.0])\n",
" plt.title('Bootstrap and Analytical $t_{statistic}$ Sampling Distributions'); plt.xlabel('$t_{statistic}$'); plt.ylabel('Density')\n",
" plt.plot([t_stat_observed,t_stat_observed],[0.0,0.6],color = 'black',label='observed $t_{statistic}$')\n",
" plt.plot([bootstrap_lower,bootstrap_lower],[0.0,0.6],color = 'blue',linestyle='dashed',label = 'bootstrap interval')\n",
" plt.plot([bootstrap_upper,bootstrap_upper],[0.0,0.6],color = 'blue',linestyle='dashed')\n",
" plt.plot(prop_values,analytical_distribution, color = 'red',label='analytical $t_{statistic}$')\n",
" plt.hist(t_stat,cumulative = False, density = True, alpha=0.2,color=\"blue\",edgecolor=\"black\", bins = np.linspace(-8.0,8.0,50), label = 'bootstrap $t_{statistic}$')\n",
"\n",
" plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values <= -1*analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
" plt.fill_between(prop_values, 0, analytical_distribution, where = prop_values >= analytical_tcrit, facecolor='red', interpolate=True, alpha = 0.2)\n",
" ax = plt.gca()\n",
" handles,labels = ax.get_legend_handles_labels()\n",
" handles = [handles[0], handles[2], handles[3], handles[1]]\n",
" labels = [labels[0], labels[2], labels[3], labels[1]]\n",
"\n",
" plt.legend(handles,labels,loc=1)\n",
"\n",
"\n",
" plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.2, wspace=0.2, hspace=0.2)\n",
" plt.show()\n",
" except ZeroDivisionError:\n",
" print(\"Oops! Recall that you must have more than 1 sample of n1 and n2.\")\n",
"\n",
"\n",
"# connect the function to make the samples and plot to the widgets \n",
Expand Down Expand Up @@ -342,7 +344,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ce2cd3c3c791421b9208affa30d477f8",
"model_id": "bfe242ef0b4f4550afddc52c8fba9394",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -356,12 +358,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "46b167d156c74ca5a69dcfca6cb2f270",
"model_id": "5e3b7fc88ca242be9637f233298bdfd0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
"Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<Figure size 432x288 with 2 Axes>', 'i…"
]
},
"metadata": {},
Expand Down