Skip to content

Commit

Permalink
First pass on cleanup of Python tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanistheone committed Apr 2, 2024
1 parent 16472d7 commit 642eda9
Show file tree
Hide file tree
Showing 6 changed files with 588 additions and 523 deletions.
1 change: 1 addition & 0 deletions _config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ html:
# BinderHub to make things interactive
launch_buttons:
binderhub_url: "https://mybinder.org"
notebook_interface: "jupyterlab"
thebe: true


Expand Down
8 changes: 4 additions & 4 deletions notebooks/42_multiple_linear_regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"weed intercept= 48.66738501700137 slope= -1.0215516597164398\n"
"weed intercept= 48.66738501700138 slope= -1.0215516597164398\n"
]
},
{
Expand Down Expand Up @@ -589,7 +589,7 @@
"output_type": "stream",
"text": [
"alc intercept= 69.33837903371315 slope= -1.8001013152459384\n",
"weed intercept= 48.66738501700137 slope= -1.0215516597164398\n",
"weed intercept= 48.66738501700138 slope= -1.0215516597164398\n",
"exrc intercept= 38.49841859100912 slope= 1.768288756457561\n",
"Saved figure to figures/lm/multiple/prediction_score_vs_alc_weed_exrc.pdf\n",
"Saved figure to figures/lm/multiple/prediction_score_vs_alc_weed_exrc.png\n"
Expand Down Expand Up @@ -838,10 +838,10 @@
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 270.3</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Thu, 28 Mar 2024</td> <th> Prob (F-statistic):</th> <td>1.05e-60</td>\n",
" <th>Date:</th> <td>Mon, 01 Apr 2024</td> <th> Prob (F-statistic):</th> <td>1.05e-60</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>12:56:03</td> <th> Log-Likelihood: </th> <td> -547.63</td>\n",
" <th>Time:</th> <td>07:45:01</td> <th> Log-Likelihood: </th> <td> -547.63</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 156</td> <th> AIC: </th> <td> 1103.</td>\n",
Expand Down
35 changes: 19 additions & 16 deletions notebooks/43_interpreting_linear_models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@
"id": "f853a5c7-3a3b-4008-87c9-0deab0233ba9",
"metadata": {},
"source": [
"## Definitions"
"## Introduction"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 59,
"id": "709070d1-324d-402f-a5cf-50134df5d2b7",
"metadata": {},
"outputs": [
Expand All @@ -124,10 +124,10 @@
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 270.3</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Thu, 28 Mar 2024</td> <th> Prob (F-statistic):</th> <td>1.05e-60</td>\n",
" <th>Date:</th> <td>Sun, 31 Mar 2024</td> <th> Prob (F-statistic):</th> <td>1.05e-60</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>15:25:12</td> <th> Log-Likelihood: </th> <td> -547.63</td>\n",
" <th>Time:</th> <td>09:52:13</td> <th> Log-Likelihood: </th> <td> -547.63</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 156</td> <th> AIC: </th> <td> 1103.</td>\n",
Expand Down Expand Up @@ -181,8 +181,8 @@
"\\textbf{Dep. Variable:} & score & \\textbf{ R-squared: } & 0.842 \\\\\n",
"\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.839 \\\\\n",
"\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 270.3 \\\\\n",
"\\textbf{Date:} & Thu, 28 Mar 2024 & \\textbf{ Prob (F-statistic):} & 1.05e-60 \\\\\n",
"\\textbf{Time:} & 15:25:12 & \\textbf{ Log-Likelihood: } & -547.63 \\\\\n",
"\\textbf{Date:} & Sun, 31 Mar 2024 & \\textbf{ Prob (F-statistic):} & 1.05e-60 \\\\\n",
"\\textbf{Time:} & 09:52:13 & \\textbf{ Log-Likelihood: } & -547.63 \\\\\n",
"\\textbf{No. Observations:} & 156 & \\textbf{ AIC: } & 1103. \\\\\n",
"\\textbf{Df Residuals:} & 152 & \\textbf{ BIC: } & 1115. \\\\\n",
"\\textbf{Df Model:} & 3 & \\textbf{ } & \\\\\n",
Expand Down Expand Up @@ -219,8 +219,8 @@
"Dep. Variable: score R-squared: 0.842\n",
"Model: OLS Adj. R-squared: 0.839\n",
"Method: Least Squares F-statistic: 270.3\n",
"Date: Thu, 28 Mar 2024 Prob (F-statistic): 1.05e-60\n",
"Time: 15:25:12 Log-Likelihood: -547.63\n",
"Date: Sun, 31 Mar 2024 Prob (F-statistic): 1.05e-60\n",
"Time: 09:52:13 Log-Likelihood: -547.63\n",
"No. Observations: 156 AIC: 1103.\n",
"Df Residuals: 152 BIC: 1115.\n",
"Df Model: 3 \n",
Expand All @@ -244,18 +244,21 @@
"\"\"\""
]
},
"execution_count": 5,
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# load the dataset\n",
"doctors = pd.read_csv(\"../datasets/doctors.csv\")\n",
"n = doctors.shape[0]\n",
"\n",
"lm2 = smf.ols('score ~ 1 + alc + weed + exrc', data=doctors).fit()\n",
"# fit the model\n",
"formula = \"score ~ 1 + alc + weed + exrc\"\n",
"lm2 = smf.ols(formula, data=doctors).fit()\n",
"\n",
"# the coefficients of the best-fit line\n",
"# display the summary table\n",
"lm2.summary()"
]
},
Expand Down Expand Up @@ -1321,13 +1324,13 @@
],
"source": [
"df_slplot = pd.DataFrame(\n",
" {'fitted': lm2.fittedvalues,\n",
" 'sqrt_abs_stand_res': np.sqrt(np.abs(lm2.resid))})\n",
" {\"fitted\": lm2.fittedvalues,\n",
" \"sqrt_abs_stand_res\": np.sqrt(np.abs(lm2.resid))})\n",
"\n",
"ax = sns.regplot(data=df_slplot, x='fitted', y='sqrt_abs_stand_res', lowess=True)\n",
"ax = sns.regplot(data=df_slplot, x=\"fitted\", y=\"sqrt_abs_stand_res\", lowess=True)\n",
"\n",
"ax.set_ylabel(r'$\\sqrt{|standardized residuals|}$')\n",
"ax.set_xlabel('Fitted Values');"
"ax.set_ylabel(r\"$\\sqrt{|standardized residuals|}$\")\n",
"ax.set_xlabel(\"Fitted Values\");"
]
},
{
Expand Down
6 changes: 6 additions & 0 deletions notebooks/Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
158 changes: 158 additions & 0 deletions tutorials/cut_material.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,164 @@
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "81c16d0a-d730-43ff-ab8a-e3283b746c82",
"metadata": {},
"source": [
"## FROM PY TUTORIAL"
]
},
{
"cell_type": "markdown",
"id": "a4e9450d-c741-4a3d-aa9a-3e4937cf76a4",
"metadata": {},
"source": [
"### Python for data science and statistics\n",
"\n",
"Python is a popular language for data analysis\n",
"because of the numerous functions it provides for data management, \n",
"data visualization, and statistics.\n",
"\n",
"Learning to use these Python functions will \n",
" y\n",
"\n",
"Learning a few basic Python constructs like the `for` loop\n",
"will enable you to simulate probability distributions and experimentally verify how statistics procedures work.\n",
"This is a really big deal!\n",
"If's good to know the statistical formula and recipes,\n",
"but it's even better when you can run your own simulations and check when the formulas work and when they fail.\n",
"\n",
"Once you learn the basics of Python syntax,\n",
"you'll have access to the best-in-class tools for\n",
"data management (Pandas, see [pandas_tutorial.ipynb](./pandas_tutorial.ipynb)),\n",
"data visualization (Seaborn, see [seaborn_tutorial.ipynb](./seaborn_tutorial.ipynb)),\n",
"statistics (`scipy` and `statsmodels`).\n",
"\n",
"Don't worry there won't be any advanced math—just sums, products, exponents, logs, and square roots.\n",
"Nothing fancy, I promise.\n",
"If you've ever created a formula in a spreadsheet,\n",
"then you're familiar with all the operations we'll see.\n",
"In a spreadsheet formula you'd use `SUM(` in Python we write `sum(`.\n",
"You see, it's nothing fancy.\n",
"\n",
"Yes, there will be a lot of code (paragraphs of Python commands) in this tutorial,\n",
"but you can totally handle this.\n",
"If you ever start to freak out an think \"OMG this is too complicated!\" remember that Python is just a fancy calculator."
]
},
{
"cell_type": "markdown",
"id": "6402c1f2-8752-4076-9894-7599457d0e2b",
"metadata": {},
"source": [
"### Overview of the material in this tutorial\n",
"\n",
"We'll cover all essential topics required to get to know Python, including:\n",
"\n",
"- [Getting started](#Getting-started) where we'll install JupyterLab Desktop coding environment\n",
"\n",
"- [Expressions and variables](#Expressions-and-variables): basic building blocks of any program.\n",
"\n",
"- [Getting comfortable with Python](#Getting-comfortable-with-Python): looking around and getting help.\n",
"\n",
"- [Lists and for loops](#Lists-and-for-loops): repeating steps and procedures.\n",
"\n",
"- [Functions](#Functions) are reusable code blocks.\n",
"\n",
"- [Other data structures](#Other-data-structures): sets, tuples, etc. \n",
" - [Boolean variables and conditional statements](#Boolean-variables-and-conditional-statements): conditional code execution.\n",
" - [Dictionaries](#Dictionaries) are a versatile way to store data. \n",
"\n",
"- [Objects and classes](#Objects-and-classes): creating custom objects.\n",
"\n",
"- [Python grammar and syntax](#Python-grammar-and-syntax): review of all the syntax.\n",
"\n",
"- [Python libraries and modules](#Python-libraries-and-modules): learn why people say Python comes with \"batteries included\"\n",
"\n",
"After you're done with this tutorial, you'll be ready to read the other two:\n",
"- Pandas (see [pandas_tutorial.ipynb](./pandas_tutorial.ipynb))\n",
"- Seaborn (see [seaborn_tutorial.ipynb](./seaborn_tutorial.ipynb)) \n",
"\n",
"It's important for you to try solving the exercises that you'll encounter as you read along. The exercises are a great way to practice what you've been learning.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3288af5c-2671-480a-b7e4-28e369b10a18",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7a96667a-38eb-4275-9084-7695a5b88844",
"metadata": {},
"outputs": [],
"source": [
"## ALT. display both value and type on the same line (as a tuple)\n",
"# score, type(score)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04701230-d46d-4219-973c-57ffcf99ce31",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "af0d6704-8a21-4ff1-abde-fd49307bd349",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "367f79df-7641-4f9e-9113-7233cd16b8ff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d2df7ae-b1c1-4e45-a702-a247530ad39b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bb07b04-3ac7-49ed-8394-ee40e542262e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f5b2242-eb07-4126-af42-6d45fc0b18d7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "223d9867-b221-46a0-9528-fc0012ded71d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "c3596647-f291-4290-bff2-d8716c1f5e8d",
Expand Down
Loading

0 comments on commit 642eda9

Please sign in to comment.