diff --git a/.ipynb_checkpoints/titanic_survival-checkpoint.ipynb b/.ipynb_checkpoints/titanic_survival-checkpoint.ipynb new file mode 100644 index 0000000..ea74d15 --- /dev/null +++ b/.ipynb_checkpoints/titanic_survival-checkpoint.ipynb @@ -0,0 +1,1458 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 66.2+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "age_mean = df.Age.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "df['Age'] = df.Age.fillna(age_mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
PassengerId891.0446.000000257.3538421.00223.5000446.000000668.5891.0000
Survived891.00.3838380.4865920.000.00000.0000001.01.0000
Pclass891.02.3086420.8360711.002.00003.0000003.03.0000
Age891.029.69911813.0020150.4222.000029.69911835.080.0000
SibSp891.00.5230081.1027430.000.00000.0000001.08.0000
Parch891.00.3815940.8060570.000.00000.0000000.06.0000
Fare891.032.20420849.6934290.007.910414.45420031.0512.3292
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% \\\n", + "PassengerId 891.0 446.000000 257.353842 1.00 223.5000 446.000000 668.5 \n", + "Survived 891.0 0.383838 0.486592 0.00 0.0000 0.000000 1.0 \n", + "Pclass 891.0 2.308642 0.836071 1.00 2.0000 3.000000 3.0 \n", + "Age 891.0 29.699118 13.002015 0.42 22.0000 29.699118 35.0 \n", + "SibSp 891.0 0.523008 1.102743 0.00 0.0000 0.000000 1.0 \n", + "Parch 891.0 0.381594 0.806057 0.00 0.0000 0.000000 0.0 \n", + "Fare 891.0 32.204208 49.693429 0.00 7.9104 14.454200 31.0 \n", + "\n", + " max \n", + "PassengerId 891.0000 \n", + "Survived 1.0000 \n", + "Pclass 3.0000 \n", + "Age 80.0000 \n", + "SibSp 8.0000 \n", + "Parch 6.0000 \n", + "Fare 512.3292 " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe().T" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedAge_GroupChildAdultSeniors01
0103Braund, Mr. Owen Harris022.010A/5 211717.2500NaNSAdult01010
1211Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.2833C85CAdult01001
2313Heikkinen, Miss. Laina126.000STON/O2. 31012827.9250NaNSAdult01001
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.1000C123SAdult01001
4503Allen, Mr. William Henry035.0003734508.0500NaNSAdult01010
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp Parch \\\n", + "0 Braund, Mr. Owen Harris 0 22.0 1 0 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 0 \n", + "2 Heikkinen, Miss. Laina 1 26.0 0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 0 \n", + "4 Allen, Mr. William Henry 0 35.0 0 0 \n", + "\n", + " Ticket Fare Cabin Embarked Age_Group Child Adult Seniors \\\n", + "0 A/5 21171 7.2500 NaN S Adult 0 1 0 \n", + "1 PC 17599 71.2833 C85 C Adult 0 1 0 \n", + "2 STON/O2. 3101282 7.9250 NaN S Adult 0 1 0 \n", + "3 113803 53.1000 C123 S Adult 0 1 0 \n", + "4 373450 8.0500 NaN S Adult 0 1 0 \n", + "\n", + " 0 1 \n", + "0 1 0 \n", + "1 0 1 \n", + "2 0 1 \n", + "3 0 1 \n", + "4 1 0 " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "age_cat = ['Child','Adult','Seniors']\n", + "grouping = [0,17,65,100]\n", + "df['Age_Group'] = pd.cut(df['Age'], bins=grouping, labels=age_cat)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XuYXVV9//H3h4SbAQUMhkAigyUiiBVDuLSovwhVIV6iv6pFqVzEYjU+QE0rgdofeHsKbREvBRQEAQGBikhEqkRkVFQQgpGAAQ04Qki4REJIwkUTv78/1jrJzuTMzDmZc9mz5/N6nvPM2dfz3bP2+Z61174sRQRmZlZdW3Q7ADMzay8nejOzinOiNzOrOCd6M7OKc6I3M6s4J3ozs4pzojcza4CkXkkf7HYcm8OJfhC5YFdI2rrbsVhrSOqT9Kyk1YXXrt2Oy4anX7k+Julrkrbrdlxl4UQ/AEk9wOuAAN7e1WCs1d4WEdsVXkubWVjSmHYFZsPytojYDpgKHAB8opmFJY1tS1Ql4EQ/sKOB24BLgGNqIyW9WNJ3JD0t6Q5Jn5F0a2H6KyTNk/SkpPslvafzoVszJG0h6ZuSHpX0VD6S27sw/XJJ50r6nqQ1wOskbSPpc5IezjXI8yRt08XNsCwiHgH+F9hX0nGSFklaJelBSR+qzSdpuqQlkk6R9CjwtTx+pqQF+Tv+gKTDC6vfXdJP8/pukjS+s1u3eZzoB3Y0cEV+vVnShDz+XGANsAvpB6D4IzAOmAdcCbwEeC9wnqRXdjBu2zw3AFNI5XoP8PV+098HfBLYHvg58F/AHsBf5uV6gH/tUKw2CEmTgRnAL4HHgbcCLwSOA86RNLUw+y7ATsDuwAmSDgQuA/4F2AF4PdBXmP99eT0vAbYC/rmd29IyEeFXvxfwWuBPwPg8fB/wT8CYPH6vwryfAW7N7/8O+Em/dX0FOL3b2+TX+vLoA1YDT+XXt+vMM57UZDcuD18OXFyYvgXwHLB7YdzrgN92e/tG66tfuf4eOA/Yts583wZOyu+nA38EtilM/wpwzgCf0Qt8ojD8EeB73d72Rl6VbZMapmOAmyJieR6+Mo/7BjAWeLgwb/H97sBBkp4qjBvLprVD6653RMQPagO5zf3fgXeRkvyf86TxpKM32LicdwG2Bn4laf1q2hmwNWSjcgWQdARwOvBy0g/0C4CFhVmeiIjnCsOTgRsH+YxHC++fAUbECV8n+n4kbQu8BxiT2+0gfal3ACYAa4FJwG/ytMmFxR8GfhQRb+xQuNYaR5MO9Q8l1QZfDDzBxsm7+JjXx0g1wb0i4rFOBWnNyVfLXUsq3+sj4k+Svs3A5QrpO/wXHQqxY9xGv6l3AOuAfYD98mtv4CekHeZbwBmSXiDpFXlczQ3AyyW9X9KW+XVA8cSeldL2wPPAH0g1vs8ONnNErAO+Cnxe0s5KJkl6U/tDtSZsRaqkPQGszbX7ocroIuA4SYflk/S75e/5iOZEv6ljgK9FxEMR8WjtBfw3cBTwUeBFpEO4r5Oac54HiIhVpB3pSGBpnucs0s5m5fU1UnktBe4FftbAMrNJtf9fACuBm0gnZa0k8vfxROAaYAXpROrcIZb5BfmkLalcf0Rqkh3RlE8q2GaSdBawS0QcM+TMZmZd4Bp9k/J18n+ZD9cPBI4Hrut2XGZmA/HJ2OZtT2qu2ZV0je7ZwPVdjcjMbBBuujEzqzg33ZiZVVwpmm7Gjx8fPT0964fXrFnDuHHjuhdQG5Rtm+bPn788Inbu1OeVsYyrHoPLeIMyxQKti6fhMu72rbkRwf777x9Ft9xyS1RN2bYJuDNGeRlXPQaX8QZliiWidfE0WsZuujEzq7hSNN10S8+c7w46ve/Mt3QoktFn4SMrOdb//0pzGZeHa/RmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvNkpI6pO0UNICSXfmcTtJmifpt/nvjnm8JH1R0mJJd0ua2t3obTic6M1GlzdExH4RMS0PzwFujogpwM15GOAIYEp+nQCc3/FIrWWc6M1Gt5nApfn9pcA7CuMvy489vw3YQdLEbgRowzeqH1NsG0jqA1YB64C1ETFN0k7A1UAP0Ae8JyJWSBLwBWAG8AxwbETc1Y24rSkB3CQpgK9ExAXAhIhYBhARyyS9JM+7G/BwYdkledyy4golnUCq8TNhwgR6e3vXT5uwLcx+1dpBAyrO306rV6/u2Gc1otPxONFb0RsiYnlhuHZYf6akOXn4FDY+rD+IdFh/UKeDtaYdEhFLczKfJ+m+QeZVnXGxyYj0Y3EBwLRp02L69Onrp33pius5e+HgKabvqOmDTm+V3t5eirF1W6fjcdONDcaH9RUSEUvz38eB64ADgcdqZZf/Pp5nXwJMLiw+CVjauWitlVyjt5pRd1hfhsP5TsUgaRywRUSsyu/fBHwKmAscA5yZ/16fF5kLfFTSVaSjtZW1fcFGHid6qxl1h/VlOJzvYAwTgOvS6RXGAldGxPck3QFcI+l44CHg3Xn+G0nnYBaTzsMc14kgrT2c6A3Y+LBe0kaH9bk278P6ESwiHgReXWf8H4DD6owPYFYHQrMOGLKNXtJkSbdIWiTpXkkn5fG+0aIiJI2TtH3tPemw/h42HNbDpof1R+eyPhgf1puVWiMnY9cCsyNib+BgYJakffCNFlUyAbhV0q+AXwDfjYjvkdpt3yjpt8Ab8zCkw/oHSYf1FwIf6XzIZtaoIZtuck2tdkJulaRFpBNvM4HpebZLgV7SpXfrr8gAbpO0Q+3wv/XhWyv4sN6s2pq6vFJSD/Aa4Hb6XZEBDHVFhpmZdUHDJ2MlbQdcC5wcEU/ns/d1Z60zbpMrMga79K5Tl5x18vK+MlzKZ2ajU0OJXtKWpCR/RUR8K48e1hUZg11616lLzo6d891Bp7fy8r4yXMpnZqNTI1fdCLgIWBQRnytM8hUZZmYjQCM1+kOA9wMLJS3I404jXYFR2hsteoaorZuZjRaNXHVzK/Xb3cFXZJiZlZ4famZmVnFO9GZmFedEb2ZWcU70ZqPAIM+sOkPSI7nD8AWSZhSWOTU/s+p+SW/uXvQ2XH56pdnoUHtm1V35AXbzJc3L086JiP8qzpyfZ3Uk8EpgV+AHkl4eEes6GrW1hGv0ZqNARCyr9esbEauA2jOrBjITuCoino+I35Eulz6w/ZFaOzjRm40y/Z5ZBaknqbslXVx73Dh+ZlWluOnGbBSp88yq84FPk55H9WngbOADtOCZVWXoLrKmbM+a6nQ8TvSDaOTu2r4z39KBSMyGr94zqyLiscL0C4Eb8uCwn1lVhu4ia8r2rKlOx+OmG7NRYKBnVuUHEta8k9SzGKRnVh0paWtJe5A6EvpFp+K11nKN3mx0GOiZVe+VtB+pWaYP+BBARNwr6Rrg16Qrdmb5ipuRy4nebBQY5JlVNw6yzGeBz7YtKOsYN92YmVWcE72ZWcU50ZuZVZwTvZlZxflk7DANda29r7M3s25zjd7MrOJcozezrvERcWeUMtEvfGQlx3oHGPWcBMxao5SJvhGNPIemDGpxzn7V2ro/Xk5WZtZuIzbRm5n5wYON8clYM7OKc6I3M6s4N910mQ89zazd2pLoJR0OfAEYA3w1Is5sx+dY95ShjP0j2V5lKGNrjZYnekljgHOBN5J6qblD0tyI+HWrP2u0KNtlhi7j6nMZV0s7avQHAosj4kEASVeRepT3DtImXajZVqaMW3GZbkWPGipTxkPp1D5Q/Jx6l1u3cz9qR6Kv13v8Qf1nKnYqDKyWdH9h8nhgeRti65oTu7xNOmuTUbsPY3UjpozrbHdRS2IY4jOG0s7/w4gv42H+b4vraOv+1myc9fLBZm5rQ2XcjkTfUO/xxU6FN1mBdGdETGt1YN1UsW2qRBk7hkFVooxryhQLdD6edlxe2VDv8TaiuYyrz2VcIe1I9HcAUyTtIWkr4EhSj/LWJpJ6JX2wgx9ZyTKWNF3Skm7HURKVKGNJR0m6qTAckvbsZkzd0PJEHxFrgY8C3wcWAddExL1NrqbuoSCApD5Jz0paLekxSV+TtN0wQu6UAbdpKJLOkHR5K4MZjnaXcas0sK+0PYYGlCGGTYyUMq6R9FpJP5O0UtKTkn4q6YCIuCIi3tRILJK2knS2pCV5n/mdpHPaFHJHy10RmzS7lZqkPuCDEfEDSbuRdsQbImJOdyNrD0ljgU8Ae0bE3w8wTy9weUR8tZOxlV2z+4qk6aT/46TORWnDJemFwEPAh4FrgK2A1wGPRsTd/eYNYEpELK6zntOBQ4H3AstIJzpfHxGXtXcL2m9EPwIhIh4B/hfYV9JxkhZJWiXpQUkfqs0nabykGyQ9lX/tfyJpizztFEmP5OXul3RYHr+FpDmSHpD0B0nXSNopT+vJh4DHSHpI0nJJ/1r4vG0lXSppRY7p48UmAUm7SrpW0hO51nBiYdoZkr4p6XJJTwPH9t9uSW+UdF+uvfw39U+cWUG/fWWnXLtfmsvo2/WWKZT/Kkm/lvTOwrQ9Jf0ol8FySVfn8ZJ0jqTH87S7Je3bma0ctV4OEBHfiIh1EfFsRNwUEXdLOlbSrf3mn5FzxHJJ/1nLBcABwHURsTSSvmKSz0eIp+Z9YUXeh7bp0DYOy4hO9JImAzOAXwKPA28FXggcB5wjaWqedTbp5NLOwATgNCAk7UU6PD0gIrYH3gz05WVOBN4B/B9gV2AF6QaSotcCewGHAf9P0t55/OlAD/Ay0g0n62vieaf6DvAr0iVshwEnS3pzYb0zgW8COwBX9Nvm8cC1pFr+eOAB4JCh/1ujW7995evAC4BXAi8BBjo8f4BUM3wR8EngckkT87RPAzcBO5JOVH4pj38T8HpS8tkB+DvgDy3eHNvYb4B1uXJ1hKQdh5j/ncA0YCrpu/aBPP424GOSPiLpVZLqVaCOIuWJvyCV8SdasgXtFhGleQGHA/cDi4E5A8zTB6wGngJ+D5wHbFtnvm8DJ+X3nwKuJzV/FOfZk/QD8TfAlv2mLQIOKwxPBP5EuiS1h3Sp2aTC9F8AR+b3fwJ+BywA7gQ+CDwCzCMdYj4L7FhY9lTga/n9GcCP+8VyBqlJAeBo4LbCNJF+xD7Y7fJrZTm36HMeBtYC64A/Aj/J5fhn4Bbgt7lMdszzTwdW5bjuBqb2W98CYGZ+fxmpnXVSv3kOJSWeg4Et8rgxpB+YG/LwHsDt+fOvBrbK47fOw4vz9J5ul1UryrMT2wXsTWq2eS5/N58mJeFjgVsL8wWwJpflAlKl6+ZCOc0Cfgo8T7rK6JjCsn3APxaGZwAPDBJTH7CwlgfqTBfwxYH2t5b+f7q9kxQ2egypBvUyUhvbr4B9Bvjn/U2d8UeQfpGfJP0I/BH4dJ62PXA28GB+zSks9z7gVlKN/Spg1zz+mbyzPFV4PUeqhffkHWZsYT295GSbp/11YdqbSQlkDvAeUuJ5rrDeVcCNed4zgCv6bdsZbEj0c4D/6Tf954yQRN9oObfosx6ufTHzPvAbUg37mdo+kP+fZ+X3p+RyESlRP5C/pLVyWgscn+fdBbgwJ4N7gQ8UPvdEYD7wBOnH4FTgSjYk+mvYUCn4MvDh/P4jwJfz+yOBq7tdXq0oz05tF+lHfCrwCuCu/P09jU0TfW9h+C3Aojrr2paU9NcBe+dxfcBbCvO8Enh2kHj6gPGDTJ9Bak6s7W+3t6ucytR0s/6W64j4IynpzmxkQUlbk5oz/guYEBE7ADeS264jYlVEzI6IlwFvIx2eHZanXRkRryWdeAmgdn/aw8AREbFD4bVNpLbeoawlNffUTAa2AS7N6/098PvCerePiBmF+Qc7Q76MwvXN+fBy8sCzl85ml/NmWEeqLRERq0hHaZC+xNfl95eSmuggNYGtifQtXEaqeZ8OvDjvU/ewYZ96NCL+ISJ2BT4EnKd82V5EfDEi9iclgleR7hz9Kqwvr0NJTXP9P39mHiZPP2yA5oMyaaQ8O7JdEbEsIu6KiPuAi/Poes042xbev5Q69wdEauc/l1QB3Kcwqfhdq7tsE2YCl0VyG7BDoWmwpcqU6Ovdcr1bg8tuRTo8fAJYK+kIUlspAJLemk+eifQrv47UpreXpEPzD8VzpCaVdXmxLwOflbR7XsfOkhpNSGuAr0laIOnjpPMAYyJiGamJZwUwOZ+0HSNpX0kHNLju7wKvlPR/la7IOZFUuxwphlPOm01SD/AaUg1qLXB6bstdzoYf5Z3ZUP7jyEdtefnjgH0L63u3pNrVOSvyvOskHSDpIElbkvaDPUht+X/O874YeCrS5Yuw8fav/9/k6Svz/GXWSHm2dbskvULS7Fp55PMxx5Ly2wN1Fpkq6R5JtwD/QmpWQtLJSvdSbCtprKRjSEeCvywsO0vSJKULM06rLTuAAG6SNF/pURH9dey7UKZE39At1/Xk2tqJpEPiFaTmmOLNHVOAH5Da9n8OnBcRvaQfhzNJX/ZHSSfmTsvLfCGv4yZJq0jNQps862MA+5POEexBOj9wR21bImId6ahiLKkdfzmptveiBrd1OfDuHPcf8rb9tMG4ymCzy3mzPzBdO38tcHJEPE1quvkTcB/pHM3WmwSUntL4EKlm+BipZl78Px8A3C5pNWk/OSkifke6GOBC0n64lFSxmF0Mp06I0cC0smok5nZv1yrSd/N2SWtI5wF2Jx1pPVdn/lNJJ+P3J9X4L8rjnyU18T5K+l7OAv428oPdsitJP9y1ZuDPDBLXIRExldSsPEvS6/tN71x5t6tNqNkX8FfA9wvDpwKndjuuFmzXGaQk8wwwMY+bCNzf7dhGQzkDW5Kun/9YYdz99coC+Arw3nrzbeZn/zupltZHSh7PkK6iWk4+v1P8f+Q4/yq/H5vnU7fLbLjl2cntqlfeQ8zfxyDt6HXm3eT8YIPLngH8c79xLd3fBnuVqUZflVuux+VmokNyTfLtpC/DbcAxebZjSFcBjUYdK+fcVHcR6WTb5wqT5lK/LOYCR+dr4Q8GVkZqbtssEXFqREyKiB7Sdv4wIo4iXfHzrgE+vxbXu/L8Za/RN1KeHdmuQcq7OM8utfMDkg4ktWq0/PLXnAe2r70nNSXf02+2lu5vg+p2jaDfL9wM0pURDwD/2u14NnMbXkZ6ZvezpHbZlaTDwV2Am0mX1N0M7NTtWKtezqT7HIJ06VrtcroZpPbhTcqCdCh9bo5rITCthbFMZ8NVNy8jnatZDPwPsHUev00eXpynv6zbZbW55Ulqsnx7J7drkPL+RzZcffVR0lVSvyJVvv66ifX30WCNPpfxr/Lr3sL/pRhL2/a3/q8R9wgEMzNrTpmabszMrA3a0jl4s8aPHx89PT3rh9esWcO4ceO6F9AwjYT458+fvzwidu7U543UMh4pccKmsbqMW69s29RwGXe7fS8i2H///aPolltuiZFsJMRPnVuy2/kaqWU8UuKM2DRWl3HrlW2bGi1jN92YmVVcKZpu+lv4yMpNekgfSer18N5t7exh3tqvp4H96ZLDy9OkAI19j71fdoZr9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9LZe7gTll5JuyMN7SLpd0m8lXZ2fToikrfPw4jy9p5txW2Mk9UlamDvEuTOP20nSvFzG82oda+cnKn4xl/HdkqZ2N3obDid6KzqJDd3tQepW8ZyImELqSOP4PP54YEVE7Amcw4buF6383hAR+0XEtDw8h9Q59hTSkzzn5PFHkDq1mULqCvH8jkdqLeNEbwDkbtjeQnX7NrX6imXZv4w70p+ptV8p74y1rvg88HFSH5nQRN+mkmp9gC4vrjD3k3kCwIQJE+jt7V0/bfXq1RsNl1VZ4pz9qrVDztNArLU+TAP4SkRcAEyI3NlFRCyT9JI870D9mW7UMcZgZTxh26HjLsP/thll2R+a5URvSHor8HhEzJc0vTa6zqxN9W2aE8kFANOmTYvp06evn9bb20txuKzKEmcjj9S45PBxQ8V6SEQszcl8nqT7Bpl32GX8pSuu5+yFg6eYvqMGjbd0yrI/NMuJ3gAOAd4uaQapN6AXkmr4O0gam2v1k0idXUOq3U0GlkgaS+rY/MnOh23NiIil+e/jkq4DDgQekzQx1+YnkjpLhw1lXFMsfxth3EZvxOjo23RUG6QP0470n2vd5Rq9DeYU4CpJnwF+Sep4mfz365IWk2ryR3YpPmvcBOC6fM58LHBlRHxP0h3ANZKOBx4C3p3nv5HU3+pi4BnguM6HbK3iRG8biYheoDe/f5B0eN9/nufYkBBsBMhl+eo64/8AHFZnfACzOhCadYCbbszMKs6J3sys4pzozcwqzonezKzinOjNzCrOid7MrOKGdXmlpD5gFbAOWBsR0yTtBFwN9AB9wHsiYsXwwjQzs83Vihp9o489NTOzLmhH081Ajz01M7MuGO6dsc089nQjw328aZmVMf6R+GhVM2uN4Sb6Zh57upHhPt60zGa/am3p4h9pj4M1s9YZVtNN8bGnwEaPPQXo99hTMzPrgs1O9Jvx2FMzM+uC4bQvNPvYUzMz64LNTvTNPvbUzLpH0mTgMmAX4M/ABRHxBUlnAP8APJFnPS0ibszLnAocT7pP5sSI+H7HA7eW8J2xhqTJkm6RtEjSvZJOyuN3kjRP0m/z3x3zeEn6oqTFku6WNLW7W2ANWAvMjoi9gYOBWZL2ydPOyffC7FdI8vuQOpR5JXA4cJ6kMd0I3IavXJeGWLfUksBd+bzLfEnzgGNJN7+dKWkO6ea3U4AjgCn5dRBwfv7bsIWPrByyw+u+M9/S7HbYAPIlz7XLnldJWgTsNsgiM4GrIuJ54He5N7EDgZ+3PVhrOdfojYhYFhF35fergFoSGOjmt5nAZZHcRupEfGKHw7bNJKkHeA1wex710XxkdnHtqI1U/g8XFlvC4D8MVmKu0dtG+iWBgW5+GygJbNR59HBviivDTV6rV68uRRyN3IDXSKyStgOuBU6OiKclnQ98mnTz46eBs4EPAKqz+CYdwFehjJtRlv2hWU70tl6dJDDgrHXGbZIEhntTXBlu8urt7aUYd7cM1cwFcMnh4waNVdKWpPK9IiK+BRARjxWmXwjckAeXAJMLi08ClvZfZxXKuBll2R+a5URvQP0kQL75Ldfmize/NZQEqqAq5xKUfrUvAhZFxOcK4yfWjtqAd5LuhYF0P8yVkj4H7Eo6H/OLDoZsLeQ2ehswCTDwzW9zgaPz1TcHAysLycLK6RDg/cChkhbk1wzgPyQtlHQ38AbgnwAi4l7gGuDXwPeAWRGxrkux2zC5Rm+wIQkslLQgjzsNOJP6N7/dCMwAFgPPAMd1NlxrVkTcSv0mtxsHWeazwGfbFhTQU4GjpZHAid4GSwJQ5+a3iAhgVluDMrOWcdONmVnFOdGbmVWcE72ZWcU50ZuZVZwTvZlZxTnRm5lVnBO9mVnFOdGbmVWcE72ZWcU50ZuZVZwTvZlZxTnRm5lVnBO9mVnFOdGbmVVcWxK9pMMl3S9psaQ57fgM6y6XcfW5jKuj5c+jlzQGOBd4I6nLuTskzY2IX7f6s6w7XMbVV5YydsckrdGOjkcOBBZHxIMAkq4CZpK6JLNqcBlX34go46F+CGDoH4NWrKPs2pHodwMeLgwvAQ7qP5OkE4AT8uBqSfcXJo8Hlrchto44sYTx66xNRu0+jNV1pIzrxNwNIyVO3nDWJrGWvow7oRXlU1hHKbapoKEybkeir9clXWwyIuIC4IK6K5DujIhprQ6sU0Z6/A0YNWU8UuKElsc6asq4GSN1m9pxMnYJMLkwPAlY2obPse5xGVefy7hC2pHo7wCmSNpD0lbAkcDcNnxOaUj6sqR/a8N6z5B0eavX2wKjroxHIZdxhbS86SYi1kr6KPB9YAxwcUTc2+Rq6h4KNkvSa4H/AF4JrAMWASdHxB2tWH9NRPxjv1Etib+sylTGHTBS4oQWxjrKyrgZI3KbFLFJs1slSHoh8BDwYeAaYCvgdcCjEXF3E+sR6f/057YEOvhnnwHsGRF/3+nPNrPqqPKdsS8HiIhvRMS6iHg2Im6KiLv7N4lI6pEUksbm4V5Jn5X0U+AZ4DRJdxZXLumfJM3N7y+R9Jn8fpGktxbmGytpuaSpefhgST+T9JSkX0maXph3D0k/krRK0jzSGX4zs2GpcqL/DbBO0qWSjpC0Y5PLv5902dj2wJeAvSRNKUx/H3BlneW+Aby3MPxmYHlE3CVpN+C7wGeAnYB/Bq6VtHOe90pgPinBfxo4psmYzcw2UbpE36rbriPiaeC1pEvCLgSekDRX0oQGV3FJRNwbEWsjYiVwPTmBS1oCTAVOLdT0t8m18A8Afydp1zz+fcBTkhYDdwK3RcSNEfHniJiXx82Q9FLgAODfIuL5iPgx8J3N3f6yknSxpMcl3dPtWAYjabKkW/IR2r2STup2TPVI2kbSL/LR4b2SPtntmKCaj0+Q1CdpoaQF/Y/wy65Uib5w2/URwD7AeyXts7nri4hFEXFsREwC9gV2BT7f4OIP9xu+kg019XHAtRHx6sI1tX8N3BwRPcAy4HxJLwDeCTwPTAF+BhyRm22ekvQU6cdoYo5tRUSsKXzm75vY3JHiEuDwbgfRgLXA7IjYGzgYmDWcfbGNngcOjYhXA/sBh0s6uJsBtfp7XDJviIj9Rtq19KVK9BRuu46IPwK1266HLSLuIyWZfYE1wAsKk3ept0i/4ZuA8ZL2A7YDru03fS/g0vz+QuBQUuyrgC9HOut9J/A0sHdE7JBf4yLiTNKPw46SxhXW+dLmtrL88pHKk92OYygRsSwi7srvV5Gu2Nqtu1FtKpLVeXDL/Or2FRZt+x7b5ilboq932/VmfbkkvULSbEmT8vBkUo38NmAB8HpJL5X0IuDUodYXEWuBbwL/Sfq/nSJpfr4FHGBcRCzL7y8k/Rh8GHiisE2XA9sCfytpTD7sni5pUkT8nvRD8ElJW+VLQ9+2OdturSWpB3gNcHt3I6kv70sLgMeBeRHR7Thb9j0umQBu6ve9HxHKlugbuu26QatIz+a4XdIaUoK/h3Q4Pg+4GribdPLzhgZ+8mmHAAAIOElEQVTXeSXwN8BlETGVdGg6C9io3T8n/LWk5pxHC+Mfzp95HBt+AP6FDeXwvhzzk8DpwGVNbbG1nKTa0dvJ+bxP6eSryvYj3b16oKR9uxxSK7/HZXJI8Xsv6fXdDqhR7XjWzXC07LbriHgEeM8g02eRknTNhYVp0wdY5icUduKIeFzSdcBq4GWSJkbEMkkTgQcjYi9JX2HjbXohML1Q+y+u/0HStf5WApK2JCX5KyLiW92OZygR8ZSkXtI5kG6e7K7k4xMiYmn+W/veHwj8uLtRNaZsNfrS33YtaZyk7WvvgTeRvlRz2XA55DGkq3TI449WcjCwsl6St3LJN8pdBCyKiM91O56BSNpZ0g75/bakI877uhtV+b/HzRrkez8ilKpG36LbrtttAnBdygOMBa6MiO9JugO4RtLxpDty353nvxGYASwm3Xx1XOdDLg9J3wCmk05sLwFOj4iLuhtVXYeQ7qVYmNu/AU6LiBu7GFM9E4FL85UuWwDXRESjTZFtMUK+x82q+73vbkiNq+wjEMzMLClb042ZmbVYKZpuxo8fHz09PeuH16xZw7hx4wZeoERGSqz945w/f/7yiNh5kEXMrCJKkeh7enq4884NdxT39vYyffr07gXUhJESa/84JVXxrlszq8NNN2ZmFVeKGn1/Cx9ZybFD9Mw+0ntlNzPrFNfozcwqzonezKzinOjNzCrOid7MrOKc6M3MKs6J3sys4pzozcwqzonezKzinOjNzCrOid7MrOKc6M3MKs6J3sys4hpO9JLGSPqlpBvy8B6Sbpf0W0lX574hkbR1Hl6cp/e0J3QzM2tEMzX6k4BFheGzgHMiYgqwAjg+jz8eWBERewLn5PnMzKxLGkr0kiYBbwG+mocFHAp8M89yKfCO/H5mHiZPPyzPb2ZmXdDo8+g/D3wc2D4Pvxh4KiLW5uElwG75/W7Aw7C+N/iVef7lxRVKOgE4AWDChAn09vaunzZhW5j9qrUMpjh/N61evbo0sQxmpMRpZq03ZKKX9Fbg8YiYL2l6bXSdWaOBaRtGRFwAXAAwbdq0KHZz96UrrufshYOH1nfU9EGnd8pI7UrQzEaPRmr0hwBvlzQD2AZ4IamGv4OksblWPwlYmudfAkwGlkgaC7wIeLLlkZuZWUOGbKOPiFMjYlJE9ABHAj+MiKOAW4B35dmOAa7P7+fmYfL0H0bEJjV6MzPrjOFcR38K8DFJi0lt8Bfl8RcBL87jPwbMGV6IZmY2HE11Dh4RvUBvfv8gcGCdeZ4D3t2C2MzMrAWaSvRWTj1zvjvkPJccPq4DkZhZGfkRCGZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhU3ZKKXNFnSLZIWSbpX0kl5/E6S5uXOwedJ2jGPl6Qv5s7B75Y0td0bYWZmA2ukRr8WmB0RewMHA7Mk7UN6/PDNuXPwm9nwOOIjgCn5dQJwfsujNjOzhjXS8ciyiLgrv18FLCL1C1vsBLx/5+CXRXIbqSeqiS2P3MzMGtLUY4ol9QCvAW4HJkTEMkg/BpJekmdb3zl4Vus4fFm/dblz8BYZ6n8F5YjTzLqj4UQvaTvgWuDkiHhaqtcHeJq1zjh3Dt5Gxzb4PPpux2lm3dHQVTeStiQl+Ssi4lt59GO1Jpn89/E8vtY5eE2x43AzM+uwRq66Eakf2EUR8bnCpGIn4P07Bz86X31zMLCy1sRjZmad10jTzSHA+4GFkhbkcacBZwLXSDoeeIgN/cTeCMwAFgPPAMe1NGIzM2vKkIk+Im6lfrs7wGF15g9g1jDjMjOzFvGdsWZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFtSfSSDpd0f+4gfM7QS5iZWbu0PNFLGgOcS+okfB/gvbkzcTMz64J21OgPBBZHxIMR8UfgKlKH4WZm1gVNdQ7eoHqdgx/Uf6Zi5+DAakn3FyaPB5YP9iE6a5hRts6QsZbBG87aJM7duxWLmXVWOxJ9052Db7IC6c6ImNbqwNphpMQ6UuI0s9ZrR9ONOwc3MyuRdiT6O4ApkvaQtBVwJKnDcDMz64KWN91ExFpJHwW+D4wBLo6Ie5tcTd0mnZIaKbGOlDjNrMWU+vI2M7Oq8p2xZmYV50RvZlZxXU30Qz0qQdLWkq7O02+X1NP5KBuK81hJT0hakF8f7FKcF0t6XNI9A0yXpC/m7bhb0tROx2hmnde1RN/goxKOB1ZExJ7AOUDHb5Nq4pEOV0fEfvn11Y4GucElwOGDTD8CmJJfJwDndyAmM+uybtboG3lUwkzg0vz+m8BhkurdkNVOI+aRDhHxY+DJQWaZCVwWyW3ADpImdiY6M+uWbib6eo9K2G2geSJiLbASeHFHoqsTQ1YvToC/zc0h35Q0uc70Mmh0W8ysQrqZ6Bt5VEJDj1Nos0Zi+A7QExF/CfyADUchZVOG/6eZdVg3E30jj0pYP4+kscCLGLxpoh2GjDMi/hARz+fBC4H9OxRbs/x4CrNRqJuJvpFHJcwFjsnv3wX8MDp/h9eQcfZr5347sKiD8TVjLnB0vvrmYGBlRCzrdlBm1l7teHplQwZ6VIKkTwF3RsRc4CLg65IWk2ryR5Y0zhMlvR1Ym+M8ttNxAkj6BjAdGC9pCXA6sCVARHwZuBGYASwGngGO60acZtZZfgSCmVnF+c5YM7OKc6I3M6s4J3ozs4pzojczqzgnejOzinOiNzOrOCd6M7OK+/9kivWuWu7JewAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.hist();" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassAgeSibSpParchFare
PassengerId1.000000-0.005007-0.0351440.033207-0.057527-0.0016520.012658
Survived-0.0050071.000000-0.338481-0.069809-0.0353220.0816290.257307
Pclass-0.035144-0.3384811.000000-0.3313390.0830810.018443-0.549500
Age0.033207-0.069809-0.3313391.000000-0.232625-0.1791910.091566
SibSp-0.057527-0.0353220.083081-0.2326251.0000000.4148380.159651
Parch-0.0016520.0816290.018443-0.1791910.4148381.0000000.216225
Fare0.0126580.257307-0.5495000.0915660.1596510.2162251.000000
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Age SibSp Parch \\\n", + "PassengerId 1.000000 -0.005007 -0.035144 0.033207 -0.057527 -0.001652 \n", + "Survived -0.005007 1.000000 -0.338481 -0.069809 -0.035322 0.081629 \n", + "Pclass -0.035144 -0.338481 1.000000 -0.331339 0.083081 0.018443 \n", + "Age 0.033207 -0.069809 -0.331339 1.000000 -0.232625 -0.179191 \n", + "SibSp -0.057527 -0.035322 0.083081 -0.232625 1.000000 0.414838 \n", + "Parch -0.001652 0.081629 0.018443 -0.179191 0.414838 1.000000 \n", + "Fare 0.012658 0.257307 -0.549500 0.091566 0.159651 0.216225 \n", + "\n", + " Fare \n", + "PassengerId 0.012658 \n", + "Survived 0.257307 \n", + "Pclass -0.549500 \n", + "Age 0.091566 \n", + "SibSp 0.159651 \n", + "Parch 0.216225 \n", + "Fare 1.000000 " + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "mapping = {'male' : 0, 'female' : 1}" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedAge_Group
0103Braund, Mr. Owen Harris022.010A/5 211717.2500NaNSAdult
1211Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.2833C85CAdult
2313Heikkinen, Miss. Laina126.000STON/O2. 31012827.9250NaNSAdult
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.1000C123SAdult
4503Allen, Mr. William Henry035.0003734508.0500NaNSAdult
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp Parch \\\n", + "0 Braund, Mr. Owen Harris 0 22.0 1 0 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 0 \n", + "2 Heikkinen, Miss. Laina 1 26.0 0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 0 \n", + "4 Allen, Mr. William Henry 0 35.0 0 0 \n", + "\n", + " Ticket Fare Cabin Embarked Age_Group \n", + "0 A/5 21171 7.2500 NaN S Adult \n", + "1 PC 17599 71.2833 C85 C Adult \n", + "2 STON/O2. 3101282 7.9250 NaN S Adult \n", + "3 113803 53.1000 C123 S Adult \n", + "4 373450 8.0500 NaN S Adult " + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex = df.Sex.replace(mapping)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "Age_Group = pd.get_dummies(df.Age_Group)\n", + "df = pd.concat([df, Age_Group], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [], + "source": [ + "Sex = pd.get_dummies(df.Sex)\n", + "df = pd.concat([df, Sex], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y= df.Survived\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchChildAdultSeniors01
03022.01001010
11138.01001001
23126.00001001
31135.01001001
43035.00001010
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Child Adult Seniors 0 1\n", + "0 3 0 22.0 1 0 0 1 0 1 0\n", + "1 1 1 38.0 1 0 0 1 0 0 1\n", + "2 3 1 26.0 0 0 0 1 0 0 1\n", + "3 1 1 35.0 1 0 0 1 0 0 1\n", + "4 3 0 35.0 0 0 0 1 0 1 0" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = df.drop(columns= ['Survived','Name','PassengerId','Age_Group','Ticket','Cabin','Fare','Embarked'])\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: Survived R-squared: 0.398
Model: OLS Adj. R-squared: 0.394
Method: Least Squares F-statistic: 83.49
Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.04e-93
Time: 08:36:02 Log-Likelihood: -395.67
No. Observations: 891 AIC: 807.3
Df Residuals: 883 BIC: 845.7
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Pclass -0.1798 0.016 -10.929 0.000 -0.212 -0.147
Sex 0.4919 0.030 16.514 0.000 0.433 0.550
Age -0.0040 0.001 -2.841 0.005 -0.007 -0.001
SibSp -0.0460 0.013 -3.537 0.000 -0.072 -0.020
Parch -0.0248 0.018 -1.367 0.172 -0.060 0.011
Child 0.4141 0.050 8.347 0.000 0.317 0.511
Adult 0.2834 0.038 7.481 0.000 0.209 0.358
Seniors 0.2631 0.124 2.124 0.034 0.020 0.506
0 0.4687 0.059 7.936 0.000 0.353 0.585
1 0.4919 0.030 16.514 0.000 0.433 0.550
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 37.345 Durbin-Watson: 1.938
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.020
Skew: 0.520 Prob(JB): 1.24e-09
Kurtosis: 3.155 Cond. No. 3.66e+16


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.03e-28. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Survived R-squared: 0.398\n", + "Model: OLS Adj. R-squared: 0.394\n", + "Method: Least Squares F-statistic: 83.49\n", + "Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.04e-93\n", + "Time: 08:36:02 Log-Likelihood: -395.67\n", + "No. Observations: 891 AIC: 807.3\n", + "Df Residuals: 883 BIC: 845.7\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Pclass -0.1798 0.016 -10.929 0.000 -0.212 -0.147\n", + "Sex 0.4919 0.030 16.514 0.000 0.433 0.550\n", + "Age -0.0040 0.001 -2.841 0.005 -0.007 -0.001\n", + "SibSp -0.0460 0.013 -3.537 0.000 -0.072 -0.020\n", + "Parch -0.0248 0.018 -1.367 0.172 -0.060 0.011\n", + "Child 0.4141 0.050 8.347 0.000 0.317 0.511\n", + "Adult 0.2834 0.038 7.481 0.000 0.209 0.358\n", + "Seniors 0.2631 0.124 2.124 0.034 0.020 0.506\n", + "0 0.4687 0.059 7.936 0.000 0.353 0.585\n", + "1 0.4919 0.030 16.514 0.000 0.433 0.550\n", + "==============================================================================\n", + "Omnibus: 37.345 Durbin-Watson: 1.938\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.020\n", + "Skew: 0.520 Prob(JB): 1.24e-09\n", + "Kurtosis: 3.155 Cond. No. 3.66e+16\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The smallest eigenvalue is 7.03e-28. This might indicate that there are\n", + "strong multicollinearity problems or that the design matrix is singular.\n", + "\"\"\"" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = sm.OLS(y, x)\n", + "results = model.fit()\n", + "results.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "# linear regression on the modeling dataset\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8169491525423729" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchChildAdultSeniors01
7093029.6991181101010
4392031.0000000001010
8403020.0000000001010
720216.0000000110001
393114.0000001010001
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Child Adult Seniors 0 1\n", + "709 3 0 29.699118 1 1 0 1 0 1 0\n", + "439 2 0 31.000000 0 0 0 1 0 1 0\n", + "840 3 0 20.000000 0 0 0 1 0 1 0\n", + "720 2 1 6.000000 0 1 1 0 0 0 1\n", + "39 3 1 14.000000 1 0 1 0 0 0 1" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.pytest_cache/v/cache/nodeids b/.pytest_cache/v/cache/nodeids new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/.pytest_cache/v/cache/nodeids @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/__pycache__/assessment.cpython-36.pyc b/__pycache__/assessment.cpython-36.pyc new file mode 100644 index 0000000..352e3d5 Binary files /dev/null and b/__pycache__/assessment.cpython-36.pyc differ diff --git a/alice.txt b/alice.txt new file mode 100644 index 0000000..84bf3cc --- /dev/null +++ b/alice.txt @@ -0,0 +1,17 @@ +Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' +So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. +There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. +In another moment down went Alice after it, never once considering how in the world she was to get out again. +The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. +Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. +'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.) +Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) +Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.' +Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. +Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof. +There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again. +Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted! +Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. +There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here before,' said Alice,) and round the neck of the bottle was a paper label, with the words 'DRINK ME' beautifully printed on it in large letters. +It was all very well to say 'Drink me,' but the wise little Alice was not going to do THAT in a hurry. 'No, I'll look first,' she said, 'and see whether it's marked "poison" or not'; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they WOULD not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger VERY deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked 'poison,' it is almost certain to disagree with you, sooner or later. +However, this bottle was NOT marked 'poison,' so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off. diff --git a/assessment.py b/assessment.py index 281675d..256eb54 100644 --- a/assessment.py +++ b/assessment.py @@ -5,138 +5,72 @@ # PYTHON SECTION def count_characters(string): - ''' - INPUT: STRING - OUTPUT: DICT (with counts of each character in input string) + d = {} + + for char in string: + if not (char in d): + d[char] = 1 + else: + d[char] = d[char] + 1 + + return d - Return a dictionary which contains - a count of the number of times each character appears in the string. - Characters which with a count of 0 should not be included in the - output dictionary. - ''' - pass def invert_dictionary(d): - ''' - INPUT: DICT - OUTPUT: DICT (of sets of input keys indexing the same input values - indexed by the input values) + d1= dict() + d2 = dict() + + for key, val in d.items(): + if val in d1.keys(): + d1[val].append(key) + else: + d1[val]=list(key) + + return d1 - Given a dictionary d, return a new dictionary with d's values - as keys and the value for a given key being - the set of d's keys which shared the same value. - e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}} - ''' - pass def word_count(filename): - ''' - INPUT: STRING - OUTPUT: INT, INT, INT (a tuple with line, word, - and character count of named INPUT file) - - The INPUT filename is the name of a text file. - The OUTPUT is a tuple containting (in order) - the following stats for the text file: - 1. number of lines - 2. number of words (broken by whitespace) - 3. number of characters - ''' - pass + with open('alice.txt', 'r') as f: + lines, words, chars = 0, 0, 0 + for line in f: + lines += 1 + words += len(line.split()) + chars += len(line) + return (lines, words, chars) def matrix_multiplication(A, B): - ''' - INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS, - LIST (of length n) OF LIST (of length n) OF INTEGERS - OUTPUT: LIST OF LIST OF INTEGERS - (storing the product of a matrix multiplication operation) - - Return the matrix which is the product of matrix A and matrix B - where A and B will be (a) integer valued (b) square matrices - (c) of size n-by-n (d) encoded as lists of lists. - - For example: - A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] corresponds to the matrix - - | 2 3 4 | - | 6 4 2 | - |-1 2 0 | - - Please do not use numpy. Write your solution in straight python. - ''' - pass + + results = [[0]*len(B[0]) for x in range(len(A))] + for A_row in range(len(A)): + for B_column in range(len(B[0])): + for B_row in range(len(B)): + results[A_row][B_column] += A[A_row][B_row] * B[B_row][B_column] + return results # NumPy SECTION def array_work(rows, cols, scalar, matrixA): - ''' - INPUT: INT, INT, INT, NUMPY ARRAY - OUTPUT: NUMPY ARRAY - (of matrix product of r-by-c matrix of "scalar"'s time matrixA) - - Create matrix of size (rows, cols) with elements initialized to the scalar - value. Right multiply that matrix with the passed matrixA (i.e. AB, not - BA). Return the result of the multiplication. You needn't check for - matrix compatibililty, but you accomplish this in a single line. - - E.g., array_work(2, 3, 5, [[3, 4], [5, 6], [7, 8]]) - [[3, 4], [[5, 5, 5], - [5, 6], * [5, 5, 5]] - [7, 8]] - ''' - pass - + return(matrixA.dot(np.full((rows,cols),scalar))) + def boolean_indexing(arr, minimum): - ''' - INPUT: NUMPY ARRAY, INT - OUTPUT: NUMPY ARRAY - (of just elements in "arr" greater or equal to "minimum") - - Return an array of only the elements of "arr" that are greater than or - equal to "minimum" - - Ex: - In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7) - Out[1]: array([7, 8]) - ''' - pass + + return arr[arr >= minimum] # Pandas SECTION def make_series(start, length, index): - ''' - INPUTS: INT, INT, LIST (of length "length") - OUTPUT: PANDAS SERIES (of "length" sequential integers - beginning with "start" and with index "index") - - Create a pandas Series of length "length" with index "index" - and with elements that are sequential integers starting from "start". - You may assume the length of index will be "length". - - E.g., - In [1]: make_series(5, 3, ['a', 'b', 'c']) - Out[1]: - a 5 - b 6 - c 7 - dtype: int64 - ''' - pass + return pd.Series(range(start, start + length), index) def data_frame_work(df, colA, colB, colC): - ''' - INPUT: DATAFRAME, STR, STR, STR - OUTPUT: None - - Insert a column (colC) into the dataframe that is the sum of colA and colB. - Assume that df contains columns colA and colB and that these are numeric. - ''' - pass + df[colC] = df[colA] + df[colB] + + return df + diff --git a/grad_school.ipynb b/grad_school.ipynb new file mode 100644 index 0000000..bb63ede --- /dev/null +++ b/grad_school.ipynb @@ -0,0 +1,446 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
admitgregparank
003803.613
116603.673
218004.001
316403.194
405202.934
\n", + "
" + ], + "text/plain": [ + " admit gre gpa rank\n", + "0 0 380 3.61 3\n", + "1 1 660 3.67 3\n", + "2 1 800 4.00 1\n", + "3 1 640 3.19 4\n", + "4 0 520 2.93 4" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= pd.read_csv('grad.txt')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 400 entries, 0 to 399\n", + "Data columns (total 4 columns):\n", + "admit 400 non-null int64\n", + "gre 400 non-null int64\n", + "gpa 400 non-null float64\n", + "rank 400 non-null int64\n", + "dtypes: float64(1), int64(3)\n", + "memory usage: 12.5 KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: admit, dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = df.admit\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFGlJREFUeJzt3X9sXed93/H3t5KTKOYqOVHCCpI2eouQzjX7wyYMt8aKy7hbFTuwA9RBHXiJFLgQtmRptqhwlA6osQFBXWxu2qRbCzX2rCyuaddJa9V2unmOOSNA7E5yftCOklpwNUeyKiW1zZSJ0UDtd3/co5VVKPHccw51eZ++XwDBe855znOe73nID889vJeMzESSVK4fGPYAJEkry6CXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFW7tsAcAsHHjxpyYmGi073e+8x0uvPDCbgc0JNayOpVSSyl1gLWcdvDgwW9l5huWa7cqgn5iYoIDBw402nd2dpZer9ftgIbEWlanUmoppQ6wltMi4v/WaeetG0kqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKtyqeGesRsfEnodqtds9eYqdNdvWceS2azvrS/r7xit6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYVbNugj4s6IOBkRTy+x7ZciIiNiY7UcEfGxiDgcEV+JiMtWYtCSpPrqXNHfBWw/c2VEbAX+OfD8otVvBbZVH7uA324/RElSG8sGfWY+Dry4xKaPArcAuWjd9cAns+8JYENEbOpkpJKkRhrdo4+I64BjmfnlMzZtBr6xaPlotU6SNCSRmcs3ipgAHszMSyPitcBjwL/IzPmIOAJMZea3IuIh4Fcz8/PVfo8Ct2TmwSX63EX/9g7j4+OXz8zMNCpgYWGBsbGxRvuuNqNQy9yx+VrtxtfBiVe6O+7k5vXddTagUZiXOkqpA6zltOnp6YOZObVcuyZ/j/6fABcDX44IgC3AUxFxBf0r+K2L2m4BXliqk8zcC+wFmJqayl6v12AoMDs7S9N9V5tRqKXu35jfPXmK2+e6+3cHR27qddbXoEZhXuoopQ6wlkENfOsmM+cy842ZOZGZE/TD/bLM/HNgP/Du6tU3VwLzmXm82yFLkgZR5+WV9wBfAN4cEUcj4uZzNH8YeA44DPwu8N5ORilJamzZ59aZ+c5ltk8sepzA+9oPS5LUFd8ZK0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBWuzj8HvzMiTkbE04vW/aeI+FpEfCUi/iAiNiza9uGIOBwRX4+In12pgUuS6qlzRX8XsP2MdY8Al2bmjwJ/CnwYICIuAW4EfqTa579GxJrORitJGtja5Rpk5uMRMXHGuv+5aPEJ4Ibq8fXATGb+FfBnEXEYuAL4Qiejlf4emdjzUKf97Z48xc4afR657dpOj6vhi8xcvlE/6B/MzEuX2PZHwL2Z+amI+C3gicz8VLXtDuCzmXn/EvvtAnYBjI+PXz4zM9OogIWFBcbGxhrtu9qMQi1zx+ZrtRtfByde6e64k5vXd9fZgIY1L3XPdV1152SY57quUfheqatNLdPT0wczc2q5dste0Z9LRPx74BRw9+lVSzRb8idJZu4F9gJMTU1lr9drNIbZ2Vma7rvajEItda4IoX/1ePtcqy+vv+PITb3O+hrUsOal7rmuq+6cDPNc1zUK3yt1nY9aGn8nRsQO4G3A1fm3TwuOAlsXNdsCvNB8eJKkthq9vDIitgMfAq7LzO8u2rQfuDEiXh0RFwPbgD9pP0xJUlPLXtFHxD1AD9gYEUeBW+m/yubVwCMRAf378v8qM5+JiPuAr9K/pfO+zPzrlRq8JGl5dV51884lVt9xjvYfAT7SZlCSpO74zlhJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYVbNugj4s6IOBkRTy9a97qIeCQinq0+X1Stj4j4WEQcjoivRMRlKzl4SdLy6lzR3wVsP2PdHuDRzNwGPFotA7wV2FZ97AJ+u5thSpKaWjboM/Nx4MUzVl8P7Kse7wPevmj9J7PvCWBDRGzqarCSpME1vUc/npnHAarPb6zWbwa+sajd0WqdJGlIIjOXbxQxATyYmZdWyy9n5oZF21/KzIsi4iHgVzPz89X6R4FbMvPgEn3uon97h/Hx8ctnZmYaFbCwsMDY2FijfVebUahl7th8rXbj6+DEK90dd3Lz+u46G9Cw5qXuua6r7pwM81zXNQrfK3W1qWV6evpgZk4t125to97hRERsyszj1a2Zk9X6o8DWRe22AC8s1UFm7gX2AkxNTWWv12s0kNnZWZruu9qMQi079zxUq93uyVPcPtf0y+v7Hbmp11lfgxrWvNQ913XVnZNhnuu6RuF7pa7zUUvTWzf7gR3V4x3AA4vWv7t69c2VwPzpWzySpOFY9sd7RNwD9ICNEXEUuBW4DbgvIm4GngfeUTV/GLgGOAx8F3jPCoxZkjSAZYM+M995lk1XL9E2gfe1HZQkqTu+M1aSCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuFaBX1E/LuIeCYino6IeyLiNRFxcUQ8GRHPRsS9EfGqrgYrSRpc46CPiM3ALwJTmXkpsAa4Efg14KOZuQ14Cbi5i4FKkpppe+tmLbAuItYCrwWOA28B7q+27wPe3vIYkqQWGgd9Zh4D/jPwPP2AnwcOAi9n5qmq2VFgc9tBSpKai8xstmPERcCngZ8HXgZ+v1q+NTPfVLXZCjycmZNL7L8L2AUwPj5++czMTKNxLCwsMDY21mjf1WYUapk7Nl+r3fg6OPFKd8ed3Ly+u84GNKx5qXuu66o7J8M813WNwvdKXW1qmZ6ePpiZU8u1W9uo976fAf4sM78JEBGfAX4K2BARa6ur+i3AC0vtnJl7gb0AU1NT2ev1Gg1idnaWpvuuNqNQy849D9Vqt3vyFLfPtfny+ruO3NTrrK9BDWte6p7ruurOyTDPdV2j8L1S1/mopc09+ueBKyPitRERwNXAV4HHgBuqNjuAB9oNUZLURpt79E/S/6XrU8Bc1dde4EPAByPiMPB64I4OxilJaqjVc+vMvBW49YzVzwFXtOlXktQd3xkrSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUuO7+M4QkjaiJjv/JyyDu2n7hih/DK3pJKpxBL0mFM+glqXAGvSQVzqCXpMK1CvqI2BAR90fE1yLiUET8ZES8LiIeiYhnq88XdTVYSdLg2l7R/ybwx5n5w8CPAYeAPcCjmbkNeLRaliQNSePX0UfEDwI/DewEyMzvAd+LiOuBXtVsHzALfKjNIM9l7tg8O4f0Gtgjt107lONK0iDaXNH/Y+CbwH+LiC9GxCci4kJgPDOPA1Sf39jBOCVJDUVmNtsxYgp4ArgqM5+MiN8Evg28PzM3LGr3UmZ+3336iNgF7AIYHx+/fGZmptE4Tr44z4lXGu3a2uTm9Z32t7CwwNjYWKd9dm3u2HytduPr6HReuj7XgxjWvNQ913XVnZNhnuu6up6Trs/1IC5ev6ZxLdPT0wczc2q5dm2C/oeAJzJzolr+Z/Tvx78J6GXm8YjYBMxm5pvP1dfU1FQeOHCg0Tg+fvcD3D43nL/k0PWtm9nZWXq9Xqd9dq3uW8V3T57qdF6GeZtsWPPS9dvy687JKNyS7HpOhv0nEJrWEhG1gr7xrZvM/HPgGxFxOsSvBr4K7Ad2VOt2AA80PYYkqb22l1zvB+6OiFcBzwHvof/D476IuBl4HnhHy2NIklpoFfSZ+SVgqacNV7fpV5LUHd8ZK0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcK2DPiLWRMQXI+LBavniiHgyIp6NiHur/ycrSRqSLq7oPwAcWrT8a8BHM3Mb8BJwcwfHkCQ11CroI2ILcC3wiWo5gLcA91dN9gFvb3MMSVI7ba/ofwO4Bfibavn1wMuZeapaPgpsbnkMSVILkZnNdox4G3BNZr43InrALwHvAb6QmW+q2mwFHs7MySX23wXsAhgfH798Zmam0ThOvjjPiVca7dra5Ob1nfa3sLDA2NhYp312be7YfK124+vodF66PteDGNa81D3XddWdk2Ge67q6npOuz/UgLl6/pnEt09PTBzNzarl2axv13ncVcF1EXAO8BvhB+lf4GyJibXVVvwV4YamdM3MvsBdgamoqe71eo0F8/O4HuH2uTRnNHbmp12l/s7OzND0P58vOPQ/Vard78lSn89L1uR7EsOal7rmuq+6cDPNc19X1nHR9rgdx1/YLV/zrq/Gtm8z8cGZuycwJ4Ebgc5l5E/AYcEPVbAfwQOtRSpIaW4nX0X8I+GBEHKZ/z/6OFTiGJKmmTp5bZ+YsMFs9fg64oot+JUnt+c5YSSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXCNgz4itkbEYxFxKCKeiYgPVOtfFxGPRMSz1eeLuhuuJGlQba7oTwG7M/OfAlcC74uIS4A9wKOZuQ14tFqWJA1J46DPzOOZ+VT1+C+BQ8Bm4HpgX9VsH/D2toOUJDUXmdm+k4gJ4HHgUuD5zNywaNtLmfl9t28iYhewC2B8fPzymZmZRsc++eI8J15ptGtrk5vXd9rfwsICY2NjnfbZtblj87Xaja+j03np+lwPYljzUvdc11V3ToZ5ruvqek66PteDuHj9msa1TE9PH8zMqeXatQ76iBgD/jfwkcz8TES8XCfoF5uamsoDBw40Ov7H736A2+fWNtq3rSO3Xdtpf7Ozs/R6vU777NrEnodqtds9earTeen6XA9iWPNS91zXVXdOhnmu6+p6Tro+14O4a/uFjWuJiFpB3+pVNxFxAfBp4O7M/Ey1+kREbKq2bwJOtjmGJKmdNq+6CeAO4FBm/vqiTfuBHdXjHcADzYcnSWqrzXPrq4B3AXMR8aVq3S8DtwH3RcTNwPPAO9oNUZLURuOgz8zPA3GWzVc37VeS1C3fGStJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXArFvQRsT0ivh4RhyNiz0odR5J0bisS9BGxBvgvwFuBS4B3RsQlK3EsSdK5rdQV/RXA4cx8LjO/B8wA16/QsSRJ57BSQb8Z+Mai5aPVOknSeRaZ2X2nEe8AfjYzf6FafhdwRWa+f1GbXcCuavHNwNcbHm4j8K0Ww11NrGV1KqWWUuoAazntH2XmG5ZrtLZh58s5CmxdtLwFeGFxg8zcC+xte6CIOJCZU237WQ2sZXUqpZZS6gBrGdRK3br5P8C2iLg4Il4F3AjsX6FjSZLOYUWu6DPzVET8G+B/AGuAOzPzmZU4liTp3Fbq1g2Z+TDw8Er1v0jr2z+riLWsTqXUUkodYC0DWZFfxkqSVg//BIIkFW5kgj4i7oyIkxHx9Fm2R0R8rPqTC1+JiMvO9xjrqFFHLyLmI+JL1cevnO8x1hURWyPisYg4FBHPRMQHlmiz6uelZh0jMS8R8ZqI+JOI+HJVy39Yos2rI+Leak6ejIiJ8z/S5dWsZWdEfHPRvPzCMMZaR0SsiYgvRsSDS2xb2TnJzJH4AH4auAx4+izbrwE+CwRwJfDksMfcsI4e8OCwx1mzlk3AZdXjfwD8KXDJqM1LzTpGYl6q8zxWPb4AeBK48ow27wV+p3p8I3DvsMfdopadwG8Ne6w16/kg8HtLfR2t9JyMzBV9Zj4OvHiOJtcDn8y+J4ANEbHp/Iyuvhp1jIzMPJ6ZT1WP/xI4xPe/A3rVz0vNOkZCdZ4XqsULqo8zfxF3PbCvenw/cHVExHkaYm01axkJEbEFuBb4xFmarOicjEzQ11DSn134yerp6mcj4keGPZg6qqeaP0H/qmuxkZqXc9QBIzIv1S2CLwEngUcy86xzkpmngHng9ed3lPXUqAXg56rbgvdHxNYltq8GvwHcAvzNWbav6JyUFPRL/fQbxZ/+T9F/W/OPAR8H/nDI41lWRIwBnwb+bWZ++8zNS+yyKudlmTpGZl4y868z88fpvyP9ioi49IwmIzMnNWr5I2AiM38U+F/87VXxqhERbwNOZubBczVbYl1nc1JS0C/7ZxdGQWZ++/TT1ey/F+GCiNg45GGdVURcQD8c787MzyzRZCTmZbk6Rm1eADLzZWAW2H7Gpv8/JxGxFljPKr+deLZaMvMvMvOvqsXfBS4/z0Or4yrguog4Qv8v+b4lIj51RpsVnZOSgn4/8O7qVR5XAvOZeXzYgxpURPzQ6XtzEXEF/Tn6i+GOamnVOO8ADmXmr5+l2aqflzp1jMq8RMQbImJD9Xgd8DPA185oth/YUT2+AfhcVr8FXE3q1HLG73uuo//7lVUlMz+cmVsyc4L+L1o/l5n/8oxmKzonK/bO2K5FxD30X/mwMSKOArfS/+UMmfk79N+Few1wGPgu8J7hjPTcatRxA/CvI+IU8Apw42r8JqxcBbwLmKvuowL8MvAPYaTmpU4dozIvm4B90f/nPz8A3JeZD0bEfwQOZOZ++j/U/ntEHKZ/1Xjj8IZ7TnVq+cWIuA44Rb+WnUMb7YDO55z4zlhJKlxJt24kSUsw6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKtz/AwukaMeTkQuqAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df['rank'].hist();" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 273\n", + "1 127\n", + "Name: admit, dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.admit.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gregparank
03803.613
16603.673
28004.001
36403.194
45202.934
\n", + "
" + ], + "text/plain": [ + " gre gpa rank\n", + "0 380 3.61 3\n", + "1 660 3.67 3\n", + "2 800 4.00 1\n", + "3 640 3.19 4\n", + "4 520 2.93 4" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = df.drop(columns=['admit'])\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# linear regression on the modeling dataset\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(268, 3)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " dtype=int64)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7575757575757576" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/testing.py b/testing.py index c68b010..a350f0b 100644 --- a/testing.py +++ b/testing.py @@ -1,28 +1,35 @@ + +import assessment as a +import numpy as np +import pandas as pd +import pytest + + def test_count_characters(self): string = "abafdcggfaabe" answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} result = a.count_characters(string) - self.assertEqual(result, answer) + assert result == answer -def test_invert_dictionary(self): +def test_invert_dictionary(): d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}} - self.assertEqual(a.invert_dictionary(d), result) + assert a.invert_dictionary(d) == result -def test_word_count(self): - self.assertEqual(a.word_count('data/alice.txt'), (17, 1615, 8449)) +def test_word_count(): + assert a.word_count('data/alice.txt') == (17, 1615, 8461) -def test_matrix_multiplication(self): +def test_matrix_multiplication(): A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]] answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]] - self.assertEqual(a.matrix_multiplication(A, B), answer) + assert a.matrix_multiplication(A, B) == answer -def test_array_work(self): +def test_array_work(): matrixA = np.array([[-4, -2], [0, -3], [-4, -1], @@ -34,7 +41,7 @@ def test_array_work(self): [0, 0, 0], [-12, -12, -12]]) result1 = a.array_work(2, 3, 4, matrixA) - self.assertTrue(np.all(answer1 == result1)) + assert np.all(answer1 == result1) answer2 = np.array([[-36, -36], [-18, -18], @@ -42,36 +49,39 @@ def test_array_work(self): [0, 0], [-18, -18]]) result2 = a.array_work(2, 2, 6, matrixA) - self.assertTrue(np.all(answer2 == result2)) + assert np.all(answer1 == result2) +def test_boolean_indexing(self): + arr = np.array([[-4, -4, -3], + [-1, 16, -4], + [-3, 6, 4]]) + result1 = a.boolean_indexing(arr, 0) + answer1 = np.array([16, 6, 4]) + assert np.all(result1 == answer1) + result2 = a.boolean_indexing(arr, 10) + answer2 = np.array([16]) + assert np.all(result2 == answer2) -def test_make_series(self): +def test_make_series(): result = a.make_series(7, 4, ['a', 'b', 'c', 'd']) - self.assertTrue(isinstance(result, pd.Series)) - self.assertEqual(result['a'], 7) - self.assertEqual(result['d'], 10) + assert (isinstance(result, pd.Series)) + assert result['a'] == 7 + assert result['d'] == 10 result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi']) - self.assertEqual(result['a'], 22) - self.assertEqual(result['d'], 25) - self.assertEqual(result['hi'], 26) + assert result['a'] == 22 + assert result['d'] == 25 + assert result['hi'] == 26 + def test_data_frame_work(self): df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) colA, colB, colC = ('a', 'b', 'c') a.data_frame_work(df, colA, colB, colC) - self.assertTrue(colC in df.columns.tolist()) - self.assertEqual(df[colC].tolist(), [5, 7, 9]) + assert colC in df.columns.tolist() + assert df[colC].tolist() == [5, 7, 9] + + -def test_boolean_indexing(self): - arr = np.array([[-4, -4, -3], - [-1, 16, -4], - [-3, 6, 4]]) - result1 = a.boolean_indexing(arr, 0) - answer1 = np.array([16, 6, 4]) - self.assertTrue(np.all(result1 == answer1)) - result2 = a.boolean_indexing(arr, 10) - answer2 = np.array([16]) - self.assertTrue(np.all(result2 == answer2)) diff --git a/titanic_survival.ipynb b/titanic_survival.ipynb new file mode 100644 index 0000000..ea74d15 --- /dev/null +++ b/titanic_survival.ipynb @@ -0,0 +1,1458 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 66.2+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "age_mean = df.Age.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "df['Age'] = df.Age.fillna(age_mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
PassengerId891.0446.000000257.3538421.00223.5000446.000000668.5891.0000
Survived891.00.3838380.4865920.000.00000.0000001.01.0000
Pclass891.02.3086420.8360711.002.00003.0000003.03.0000
Age891.029.69911813.0020150.4222.000029.69911835.080.0000
SibSp891.00.5230081.1027430.000.00000.0000001.08.0000
Parch891.00.3815940.8060570.000.00000.0000000.06.0000
Fare891.032.20420849.6934290.007.910414.45420031.0512.3292
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% \\\n", + "PassengerId 891.0 446.000000 257.353842 1.00 223.5000 446.000000 668.5 \n", + "Survived 891.0 0.383838 0.486592 0.00 0.0000 0.000000 1.0 \n", + "Pclass 891.0 2.308642 0.836071 1.00 2.0000 3.000000 3.0 \n", + "Age 891.0 29.699118 13.002015 0.42 22.0000 29.699118 35.0 \n", + "SibSp 891.0 0.523008 1.102743 0.00 0.0000 0.000000 1.0 \n", + "Parch 891.0 0.381594 0.806057 0.00 0.0000 0.000000 0.0 \n", + "Fare 891.0 32.204208 49.693429 0.00 7.9104 14.454200 31.0 \n", + "\n", + " max \n", + "PassengerId 891.0000 \n", + "Survived 1.0000 \n", + "Pclass 3.0000 \n", + "Age 80.0000 \n", + "SibSp 8.0000 \n", + "Parch 6.0000 \n", + "Fare 512.3292 " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe().T" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedAge_GroupChildAdultSeniors01
0103Braund, Mr. Owen Harris022.010A/5 211717.2500NaNSAdult01010
1211Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.2833C85CAdult01001
2313Heikkinen, Miss. Laina126.000STON/O2. 31012827.9250NaNSAdult01001
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.1000C123SAdult01001
4503Allen, Mr. William Henry035.0003734508.0500NaNSAdult01010
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp Parch \\\n", + "0 Braund, Mr. Owen Harris 0 22.0 1 0 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 0 \n", + "2 Heikkinen, Miss. Laina 1 26.0 0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 0 \n", + "4 Allen, Mr. William Henry 0 35.0 0 0 \n", + "\n", + " Ticket Fare Cabin Embarked Age_Group Child Adult Seniors \\\n", + "0 A/5 21171 7.2500 NaN S Adult 0 1 0 \n", + "1 PC 17599 71.2833 C85 C Adult 0 1 0 \n", + "2 STON/O2. 3101282 7.9250 NaN S Adult 0 1 0 \n", + "3 113803 53.1000 C123 S Adult 0 1 0 \n", + "4 373450 8.0500 NaN S Adult 0 1 0 \n", + "\n", + " 0 1 \n", + "0 1 0 \n", + "1 0 1 \n", + "2 0 1 \n", + "3 0 1 \n", + "4 1 0 " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "age_cat = ['Child','Adult','Seniors']\n", + "grouping = [0,17,65,100]\n", + "df['Age_Group'] = pd.cut(df['Age'], bins=grouping, labels=age_cat)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XuYXVV9//H3h4SbAQUMhkAigyUiiBVDuLSovwhVIV6iv6pFqVzEYjU+QE0rgdofeHsKbREvBRQEAQGBikhEqkRkVFQQgpGAAQ04Qki4REJIwkUTv78/1jrJzuTMzDmZc9mz5/N6nvPM2dfz3bP2+Z61174sRQRmZlZdW3Q7ADMzay8nejOzinOiNzOrOCd6M7OKc6I3M6s4J3ozs4pzojcza4CkXkkf7HYcm8OJfhC5YFdI2rrbsVhrSOqT9Kyk1YXXrt2Oy4anX7k+Julrkrbrdlxl4UQ/AEk9wOuAAN7e1WCs1d4WEdsVXkubWVjSmHYFZsPytojYDpgKHAB8opmFJY1tS1Ql4EQ/sKOB24BLgGNqIyW9WNJ3JD0t6Q5Jn5F0a2H6KyTNk/SkpPslvafzoVszJG0h6ZuSHpX0VD6S27sw/XJJ50r6nqQ1wOskbSPpc5IezjXI8yRt08XNsCwiHgH+F9hX0nGSFklaJelBSR+qzSdpuqQlkk6R9CjwtTx+pqQF+Tv+gKTDC6vfXdJP8/pukjS+s1u3eZzoB3Y0cEV+vVnShDz+XGANsAvpB6D4IzAOmAdcCbwEeC9wnqRXdjBu2zw3AFNI5XoP8PV+098HfBLYHvg58F/AHsBf5uV6gH/tUKw2CEmTgRnAL4HHgbcCLwSOA86RNLUw+y7ATsDuwAmSDgQuA/4F2AF4PdBXmP99eT0vAbYC/rmd29IyEeFXvxfwWuBPwPg8fB/wT8CYPH6vwryfAW7N7/8O+Em/dX0FOL3b2+TX+vLoA1YDT+XXt+vMM57UZDcuD18OXFyYvgXwHLB7YdzrgN92e/tG66tfuf4eOA/Yts583wZOyu+nA38EtilM/wpwzgCf0Qt8ojD8EeB73d72Rl6VbZMapmOAmyJieR6+Mo/7BjAWeLgwb/H97sBBkp4qjBvLprVD6653RMQPagO5zf3fgXeRkvyf86TxpKM32LicdwG2Bn4laf1q2hmwNWSjcgWQdARwOvBy0g/0C4CFhVmeiIjnCsOTgRsH+YxHC++fAUbECV8n+n4kbQu8BxiT2+0gfal3ACYAa4FJwG/ytMmFxR8GfhQRb+xQuNYaR5MO9Q8l1QZfDDzBxsm7+JjXx0g1wb0i4rFOBWnNyVfLXUsq3+sj4k+Svs3A5QrpO/wXHQqxY9xGv6l3AOuAfYD98mtv4CekHeZbwBmSXiDpFXlczQ3AyyW9X9KW+XVA8cSeldL2wPPAH0g1vs8ONnNErAO+Cnxe0s5KJkl6U/tDtSZsRaqkPQGszbX7ocroIuA4SYflk/S75e/5iOZEv6ljgK9FxEMR8WjtBfw3cBTwUeBFpEO4r5Oac54HiIhVpB3pSGBpnucs0s5m5fU1UnktBe4FftbAMrNJtf9fACuBm0gnZa0k8vfxROAaYAXpROrcIZb5BfmkLalcf0Rqkh3RlE8q2GaSdBawS0QcM+TMZmZd4Bp9k/J18n+ZD9cPBI4Hrut2XGZmA/HJ2OZtT2qu2ZV0je7ZwPVdjcjMbBBuujEzqzg33ZiZVVwpmm7Gjx8fPT0964fXrFnDuHHjuhdQG5Rtm+bPn788Inbu1OeVsYyrHoPLeIMyxQKti6fhMu72rbkRwf777x9Ft9xyS1RN2bYJuDNGeRlXPQaX8QZliiWidfE0WsZuujEzq7hSNN10S8+c7w46ve/Mt3QoktFn4SMrOdb//0pzGZeHa/RmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvNkpI6pO0UNICSXfmcTtJmifpt/nvjnm8JH1R0mJJd0ua2t3obTic6M1GlzdExH4RMS0PzwFujogpwM15GOAIYEp+nQCc3/FIrWWc6M1Gt5nApfn9pcA7CuMvy489vw3YQdLEbgRowzeqH1NsG0jqA1YB64C1ETFN0k7A1UAP0Ae8JyJWSBLwBWAG8AxwbETc1Y24rSkB3CQpgK9ExAXAhIhYBhARyyS9JM+7G/BwYdkledyy4golnUCq8TNhwgR6e3vXT5uwLcx+1dpBAyrO306rV6/u2Gc1otPxONFb0RsiYnlhuHZYf6akOXn4FDY+rD+IdFh/UKeDtaYdEhFLczKfJ+m+QeZVnXGxyYj0Y3EBwLRp02L69Onrp33pius5e+HgKabvqOmDTm+V3t5eirF1W6fjcdONDcaH9RUSEUvz38eB64ADgcdqZZf/Pp5nXwJMLiw+CVjauWitlVyjt5pRd1hfhsP5TsUgaRywRUSsyu/fBHwKmAscA5yZ/16fF5kLfFTSVaSjtZW1fcFGHid6qxl1h/VlOJzvYAwTgOvS6RXGAldGxPck3QFcI+l44CHg3Xn+G0nnYBaTzsMc14kgrT2c6A3Y+LBe0kaH9bk278P6ESwiHgReXWf8H4DD6owPYFYHQrMOGLKNXtJkSbdIWiTpXkkn5fG+0aIiJI2TtH3tPemw/h42HNbDpof1R+eyPhgf1puVWiMnY9cCsyNib+BgYJakffCNFlUyAbhV0q+AXwDfjYjvkdpt3yjpt8Ab8zCkw/oHSYf1FwIf6XzIZtaoIZtuck2tdkJulaRFpBNvM4HpebZLgV7SpXfrr8gAbpO0Q+3wv/XhWyv4sN6s2pq6vFJSD/Aa4Hb6XZEBDHVFhpmZdUHDJ2MlbQdcC5wcEU/ns/d1Z60zbpMrMga79K5Tl5x18vK+MlzKZ2ajU0OJXtKWpCR/RUR8K48e1hUZg11616lLzo6d891Bp7fy8r4yXMpnZqNTI1fdCLgIWBQRnytM8hUZZmYjQCM1+kOA9wMLJS3I404jXYFR2hsteoaorZuZjRaNXHVzK/Xb3cFXZJiZlZ4famZmVnFO9GZmFedEb2ZWcU70ZqPAIM+sOkPSI7nD8AWSZhSWOTU/s+p+SW/uXvQ2XH56pdnoUHtm1V35AXbzJc3L086JiP8qzpyfZ3Uk8EpgV+AHkl4eEes6GrW1hGv0ZqNARCyr9esbEauA2jOrBjITuCoino+I35Eulz6w/ZFaOzjRm40y/Z5ZBaknqbslXVx73Dh+ZlWluOnGbBSp88yq84FPk55H9WngbOADtOCZVWXoLrKmbM+a6nQ8TvSDaOTu2r4z39KBSMyGr94zqyLiscL0C4Eb8uCwn1lVhu4ia8r2rKlOx+OmG7NRYKBnVuUHEta8k9SzGKRnVh0paWtJe5A6EvpFp+K11nKN3mx0GOiZVe+VtB+pWaYP+BBARNwr6Rrg16Qrdmb5ipuRy4nebBQY5JlVNw6yzGeBz7YtKOsYN92YmVWcE72ZWcU50ZuZVZwTvZlZxflk7DANda29r7M3s25zjd7MrOJcozezrvERcWeUMtEvfGQlx3oHGPWcBMxao5SJvhGNPIemDGpxzn7V2ro/Xk5WZtZuIzbRm5n5wYON8clYM7OKc6I3M6s4N910mQ89zazd2pLoJR0OfAEYA3w1Is5sx+dY95ShjP0j2V5lKGNrjZYnekljgHOBN5J6qblD0tyI+HWrP2u0KNtlhi7j6nMZV0s7avQHAosj4kEASVeRepT3DtImXajZVqaMW3GZbkWPGipTxkPp1D5Q/Jx6l1u3cz9qR6Kv13v8Qf1nKnYqDKyWdH9h8nhgeRti65oTu7xNOmuTUbsPY3UjpozrbHdRS2IY4jOG0s7/w4gv42H+b4vraOv+1myc9fLBZm5rQ2XcjkTfUO/xxU6FN1mBdGdETGt1YN1UsW2qRBk7hkFVooxryhQLdD6edlxe2VDv8TaiuYyrz2VcIe1I9HcAUyTtIWkr4EhSj/LWJpJ6JX2wgx9ZyTKWNF3Skm7HURKVKGNJR0m6qTAckvbsZkzd0PJEHxFrgY8C3wcWAddExL1NrqbuoSCApD5Jz0paLekxSV+TtN0wQu6UAbdpKJLOkHR5K4MZjnaXcas0sK+0PYYGlCGGTYyUMq6R9FpJP5O0UtKTkn4q6YCIuCIi3tRILJK2knS2pCV5n/mdpHPaFHJHy10RmzS7lZqkPuCDEfEDSbuRdsQbImJOdyNrD0ljgU8Ae0bE3w8wTy9weUR8tZOxlV2z+4qk6aT/46TORWnDJemFwEPAh4FrgK2A1wGPRsTd/eYNYEpELK6zntOBQ4H3AstIJzpfHxGXtXcL2m9EPwIhIh4B/hfYV9JxkhZJWiXpQUkfqs0nabykGyQ9lX/tfyJpizztFEmP5OXul3RYHr+FpDmSHpD0B0nXSNopT+vJh4DHSHpI0nJJ/1r4vG0lXSppRY7p48UmAUm7SrpW0hO51nBiYdoZkr4p6XJJTwPH9t9uSW+UdF+uvfw39U+cWUG/fWWnXLtfmsvo2/WWKZT/Kkm/lvTOwrQ9Jf0ol8FySVfn8ZJ0jqTH87S7Je3bma0ctV4OEBHfiIh1EfFsRNwUEXdLOlbSrf3mn5FzxHJJ/1nLBcABwHURsTSSvmKSz0eIp+Z9YUXeh7bp0DYOy4hO9JImAzOAXwKPA28FXggcB5wjaWqedTbp5NLOwATgNCAk7UU6PD0gIrYH3gz05WVOBN4B/B9gV2AF6QaSotcCewGHAf9P0t55/OlAD/Ay0g0n62vieaf6DvAr0iVshwEnS3pzYb0zgW8COwBX9Nvm8cC1pFr+eOAB4JCh/1ujW7995evAC4BXAi8BBjo8f4BUM3wR8EngckkT87RPAzcBO5JOVH4pj38T8HpS8tkB+DvgDy3eHNvYb4B1uXJ1hKQdh5j/ncA0YCrpu/aBPP424GOSPiLpVZLqVaCOIuWJvyCV8SdasgXtFhGleQGHA/cDi4E5A8zTB6wGngJ+D5wHbFtnvm8DJ+X3nwKuJzV/FOfZk/QD8TfAlv2mLQIOKwxPBP5EuiS1h3Sp2aTC9F8AR+b3fwJ+BywA7gQ+CDwCzCMdYj4L7FhY9lTga/n9GcCP+8VyBqlJAeBo4LbCNJF+xD7Y7fJrZTm36HMeBtYC64A/Aj/J5fhn4Bbgt7lMdszzTwdW5bjuBqb2W98CYGZ+fxmpnXVSv3kOJSWeg4Et8rgxpB+YG/LwHsDt+fOvBrbK47fOw4vz9J5ul1UryrMT2wXsTWq2eS5/N58mJeFjgVsL8wWwJpflAlKl6+ZCOc0Cfgo8T7rK6JjCsn3APxaGZwAPDBJTH7CwlgfqTBfwxYH2t5b+f7q9kxQ2egypBvUyUhvbr4B9Bvjn/U2d8UeQfpGfJP0I/BH4dJ62PXA28GB+zSks9z7gVlKN/Spg1zz+mbyzPFV4PUeqhffkHWZsYT295GSbp/11YdqbSQlkDvAeUuJ5rrDeVcCNed4zgCv6bdsZbEj0c4D/6Tf954yQRN9oObfosx6ufTHzPvAbUg37mdo+kP+fZ+X3p+RyESlRP5C/pLVyWgscn+fdBbgwJ4N7gQ8UPvdEYD7wBOnH4FTgSjYk+mvYUCn4MvDh/P4jwJfz+yOBq7tdXq0oz05tF+lHfCrwCuCu/P09jU0TfW9h+C3Aojrr2paU9NcBe+dxfcBbCvO8Enh2kHj6gPGDTJ9Bak6s7W+3t6ucytR0s/6W64j4IynpzmxkQUlbk5oz/guYEBE7ADeS264jYlVEzI6IlwFvIx2eHZanXRkRryWdeAmgdn/aw8AREbFD4bVNpLbeoawlNffUTAa2AS7N6/098PvCerePiBmF+Qc7Q76MwvXN+fBy8sCzl85ml/NmWEeqLRERq0hHaZC+xNfl95eSmuggNYGtifQtXEaqeZ8OvDjvU/ewYZ96NCL+ISJ2BT4EnKd82V5EfDEi9iclgleR7hz9Kqwvr0NJTXP9P39mHiZPP2yA5oMyaaQ8O7JdEbEsIu6KiPuAi/Poes042xbev5Q69wdEauc/l1QB3Kcwqfhdq7tsE2YCl0VyG7BDoWmwpcqU6Ovdcr1bg8tuRTo8fAJYK+kIUlspAJLemk+eifQrv47UpreXpEPzD8VzpCaVdXmxLwOflbR7XsfOkhpNSGuAr0laIOnjpPMAYyJiGamJZwUwOZ+0HSNpX0kHNLju7wKvlPR/la7IOZFUuxwphlPOm01SD/AaUg1qLXB6bstdzoYf5Z3ZUP7jyEdtefnjgH0L63u3pNrVOSvyvOskHSDpIElbkvaDPUht+X/O874YeCrS5Yuw8fav/9/k6Svz/GXWSHm2dbskvULS7Fp55PMxx5Ly2wN1Fpkq6R5JtwD/QmpWQtLJSvdSbCtprKRjSEeCvywsO0vSJKULM06rLTuAAG6SNF/pURH9dey7UKZE39At1/Xk2tqJpEPiFaTmmOLNHVOAH5Da9n8OnBcRvaQfhzNJX/ZHSSfmTsvLfCGv4yZJq0jNQps862MA+5POEexBOj9wR21bImId6ahiLKkdfzmptveiBrd1OfDuHPcf8rb9tMG4ymCzy3mzPzBdO38tcHJEPE1quvkTcB/pHM3WmwSUntL4EKlm+BipZl78Px8A3C5pNWk/OSkifke6GOBC0n64lFSxmF0Mp06I0cC0smok5nZv1yrSd/N2SWtI5wF2Jx1pPVdn/lNJJ+P3J9X4L8rjnyU18T5K+l7OAv428oPdsitJP9y1ZuDPDBLXIRExldSsPEvS6/tN71x5t6tNqNkX8FfA9wvDpwKndjuuFmzXGaQk8wwwMY+bCNzf7dhGQzkDW5Kun/9YYdz99coC+Arw3nrzbeZn/zupltZHSh7PkK6iWk4+v1P8f+Q4/yq/H5vnU7fLbLjl2cntqlfeQ8zfxyDt6HXm3eT8YIPLngH8c79xLd3fBnuVqUZflVuux+VmokNyTfLtpC/DbcAxebZjSFcBjUYdK+fcVHcR6WTb5wqT5lK/LOYCR+dr4Q8GVkZqbtssEXFqREyKiB7Sdv4wIo4iXfHzrgE+vxbXu/L8Za/RN1KeHdmuQcq7OM8utfMDkg4ktWq0/PLXnAe2r70nNSXf02+2lu5vg+p2jaDfL9wM0pURDwD/2u14NnMbXkZ6ZvezpHbZlaTDwV2Am0mX1N0M7NTtWKtezqT7HIJ06VrtcroZpPbhTcqCdCh9bo5rITCthbFMZ8NVNy8jnatZDPwPsHUev00eXpynv6zbZbW55Ulqsnx7J7drkPL+RzZcffVR0lVSvyJVvv66ifX30WCNPpfxr/Lr3sL/pRhL2/a3/q8R9wgEMzNrTpmabszMrA3a0jl4s8aPHx89PT3rh9esWcO4ceO6F9AwjYT458+fvzwidu7U543UMh4pccKmsbqMW69s29RwGXe7fS8i2H///aPolltuiZFsJMRPnVuy2/kaqWU8UuKM2DRWl3HrlW2bGi1jN92YmVVcKZpu+lv4yMpNekgfSer18N5t7exh3tqvp4H96ZLDy9OkAI19j71fdoZr9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9LZe7gTll5JuyMN7SLpd0m8lXZ2fToikrfPw4jy9p5txW2Mk9UlamDvEuTOP20nSvFzG82oda+cnKn4xl/HdkqZ2N3obDid6KzqJDd3tQepW8ZyImELqSOP4PP54YEVE7Amcw4buF6383hAR+0XEtDw8h9Q59hTSkzzn5PFHkDq1mULqCvH8jkdqLeNEbwDkbtjeQnX7NrX6imXZv4w70p+ptV8p74y1rvg88HFSH5nQRN+mkmp9gC4vrjD3k3kCwIQJE+jt7V0/bfXq1RsNl1VZ4pz9qrVDztNArLU+TAP4SkRcAEyI3NlFRCyT9JI870D9mW7UMcZgZTxh26HjLsP/thll2R+a5URvSHor8HhEzJc0vTa6zqxN9W2aE8kFANOmTYvp06evn9bb20txuKzKEmcjj9S45PBxQ8V6SEQszcl8nqT7Bpl32GX8pSuu5+yFg6eYvqMGjbd0yrI/NMuJ3gAOAd4uaQapN6AXkmr4O0gam2v1k0idXUOq3U0GlkgaS+rY/MnOh23NiIil+e/jkq4DDgQekzQx1+YnkjpLhw1lXFMsfxth3EZvxOjo23RUG6QP0470n2vd5Rq9DeYU4CpJnwF+Sep4mfz365IWk2ryR3YpPmvcBOC6fM58LHBlRHxP0h3ANZKOBx4C3p3nv5HU3+pi4BnguM6HbK3iRG8biYheoDe/f5B0eN9/nufYkBBsBMhl+eo64/8AHFZnfACzOhCadYCbbszMKs6J3sys4pzozcwqzonezKzinOjNzCrOid7MrOKGdXmlpD5gFbAOWBsR0yTtBFwN9AB9wHsiYsXwwjQzs83Vihp9o489NTOzLmhH081Ajz01M7MuGO6dsc089nQjw328aZmVMf6R+GhVM2uN4Sb6Zh57upHhPt60zGa/am3p4h9pj4M1s9YZVtNN8bGnwEaPPQXo99hTMzPrgs1O9Jvx2FMzM+uC4bQvNPvYUzMz64LNTvTNPvbUzLpH0mTgMmAX4M/ABRHxBUlnAP8APJFnPS0ibszLnAocT7pP5sSI+H7HA7eW8J2xhqTJkm6RtEjSvZJOyuN3kjRP0m/z3x3zeEn6oqTFku6WNLW7W2ANWAvMjoi9gYOBWZL2ydPOyffC7FdI8vuQOpR5JXA4cJ6kMd0I3IavXJeGWLfUksBd+bzLfEnzgGNJN7+dKWkO6ea3U4AjgCn5dRBwfv7bsIWPrByyw+u+M9/S7HbYAPIlz7XLnldJWgTsNsgiM4GrIuJ54He5N7EDgZ+3PVhrOdfojYhYFhF35fergFoSGOjmt5nAZZHcRupEfGKHw7bNJKkHeA1wex710XxkdnHtqI1U/g8XFlvC4D8MVmKu0dtG+iWBgW5+GygJbNR59HBviivDTV6rV68uRRyN3IDXSKyStgOuBU6OiKclnQ98mnTz46eBs4EPAKqz+CYdwFehjJtRlv2hWU70tl6dJDDgrHXGbZIEhntTXBlu8urt7aUYd7cM1cwFcMnh4waNVdKWpPK9IiK+BRARjxWmXwjckAeXAJMLi08ClvZfZxXKuBll2R+a5URvQP0kQL75Ldfmize/NZQEqqAq5xKUfrUvAhZFxOcK4yfWjtqAd5LuhYF0P8yVkj4H7Eo6H/OLDoZsLeQ2ehswCTDwzW9zgaPz1TcHAysLycLK6RDg/cChkhbk1wzgPyQtlHQ38AbgnwAi4l7gGuDXwPeAWRGxrkux2zC5Rm+wIQkslLQgjzsNOJP6N7/dCMwAFgPPAMd1NlxrVkTcSv0mtxsHWeazwGfbFhTQU4GjpZHAid4GSwJQ5+a3iAhgVluDMrOWcdONmVnFOdGbmVWcE72ZWcU50ZuZVZwTvZlZxTnRm5lVnBO9mVnFOdGbmVWcE72ZWcU50ZuZVZwTvZlZxTnRm5lVnBO9mVnFOdGbmVVcWxK9pMMl3S9psaQ57fgM6y6XcfW5jKuj5c+jlzQGOBd4I6nLuTskzY2IX7f6s6w7XMbVV5YydsckrdGOjkcOBBZHxIMAkq4CZpK6JLNqcBlX34go46F+CGDoH4NWrKPs2pHodwMeLgwvAQ7qP5OkE4AT8uBqSfcXJo8Hlrchto44sYTx66xNRu0+jNV1pIzrxNwNIyVO3nDWJrGWvow7oRXlU1hHKbapoKEybkeir9clXWwyIuIC4IK6K5DujIhprQ6sU0Z6/A0YNWU8UuKElsc6asq4GSN1m9pxMnYJMLkwPAlY2obPse5xGVefy7hC2pHo7wCmSNpD0lbAkcDcNnxOaUj6sqR/a8N6z5B0eavX2wKjroxHIZdxhbS86SYi1kr6KPB9YAxwcUTc2+Rq6h4KNkvSa4H/AF4JrAMWASdHxB2tWH9NRPxjv1Etib+sylTGHTBS4oQWxjrKyrgZI3KbFLFJs1slSHoh8BDwYeAaYCvgdcCjEXF3E+sR6f/057YEOvhnnwHsGRF/3+nPNrPqqPKdsS8HiIhvRMS6iHg2Im6KiLv7N4lI6pEUksbm4V5Jn5X0U+AZ4DRJdxZXLumfJM3N7y+R9Jn8fpGktxbmGytpuaSpefhgST+T9JSkX0maXph3D0k/krRK0jzSGX4zs2GpcqL/DbBO0qWSjpC0Y5PLv5902dj2wJeAvSRNKUx/H3BlneW+Aby3MPxmYHlE3CVpN+C7wGeAnYB/Bq6VtHOe90pgPinBfxo4psmYzcw2UbpE36rbriPiaeC1pEvCLgSekDRX0oQGV3FJRNwbEWsjYiVwPTmBS1oCTAVOLdT0t8m18A8Afydp1zz+fcBTkhYDdwK3RcSNEfHniJiXx82Q9FLgAODfIuL5iPgx8J3N3f6yknSxpMcl3dPtWAYjabKkW/IR2r2STup2TPVI2kbSL/LR4b2SPtntmKCaj0+Q1CdpoaQF/Y/wy65Uib5w2/URwD7AeyXts7nri4hFEXFsREwC9gV2BT7f4OIP9xu+kg019XHAtRHx6sI1tX8N3BwRPcAy4HxJLwDeCTwPTAF+BhyRm22ekvQU6cdoYo5tRUSsKXzm75vY3JHiEuDwbgfRgLXA7IjYGzgYmDWcfbGNngcOjYhXA/sBh0s6uJsBtfp7XDJviIj9Rtq19KVK9BRuu46IPwK1266HLSLuIyWZfYE1wAsKk3ept0i/4ZuA8ZL2A7YDru03fS/g0vz+QuBQUuyrgC9HOut9J/A0sHdE7JBf4yLiTNKPw46SxhXW+dLmtrL88pHKk92OYygRsSwi7srvV5Gu2Nqtu1FtKpLVeXDL/Or2FRZt+x7b5ilboq932/VmfbkkvULSbEmT8vBkUo38NmAB8HpJL5X0IuDUodYXEWuBbwL/Sfq/nSJpfr4FHGBcRCzL7y8k/Rh8GHiisE2XA9sCfytpTD7sni5pUkT8nvRD8ElJW+VLQ9+2OdturSWpB3gNcHt3I6kv70sLgMeBeRHR7Thb9j0umQBu6ve9HxHKlugbuu26QatIz+a4XdIaUoK/h3Q4Pg+4GribdPLzhgZ+8mmHAAAIOElEQVTXeSXwN8BlETGVdGg6C9io3T8n/LWk5pxHC+Mfzp95HBt+AP6FDeXwvhzzk8DpwGVNbbG1nKTa0dvJ+bxP6eSryvYj3b16oKR9uxxSK7/HZXJI8Xsv6fXdDqhR7XjWzXC07LbriHgEeM8g02eRknTNhYVp0wdY5icUduKIeFzSdcBq4GWSJkbEMkkTgQcjYi9JX2HjbXohML1Q+y+u/0HStf5WApK2JCX5KyLiW92OZygR8ZSkXtI5kG6e7K7k4xMiYmn+W/veHwj8uLtRNaZsNfrS33YtaZyk7WvvgTeRvlRz2XA55DGkq3TI449WcjCwsl6St3LJN8pdBCyKiM91O56BSNpZ0g75/bakI877uhtV+b/HzRrkez8ilKpG36LbrtttAnBdygOMBa6MiO9JugO4RtLxpDty353nvxGYASwm3Xx1XOdDLg9J3wCmk05sLwFOj4iLuhtVXYeQ7qVYmNu/AU6LiBu7GFM9E4FL85UuWwDXRESjTZFtMUK+x82q+73vbkiNq+wjEMzMLClb042ZmbVYKZpuxo8fHz09PeuH16xZw7hx4wZeoERGSqz945w/f/7yiNh5kEXMrCJKkeh7enq4884NdxT39vYyffr07gXUhJESa/84JVXxrlszq8NNN2ZmFVeKGn1/Cx9ZybFD9Mw+0ntlNzPrFNfozcwqzonezKzinOjNzCrOid7MrOKc6M3MKs6J3sys4pzozcwqzonezKzinOjNzCrOid7MrOKc6M3MKs6J3sys4hpO9JLGSPqlpBvy8B6Sbpf0W0lX574hkbR1Hl6cp/e0J3QzM2tEMzX6k4BFheGzgHMiYgqwAjg+jz8eWBERewLn5PnMzKxLGkr0kiYBbwG+mocFHAp8M89yKfCO/H5mHiZPPyzPb2ZmXdDo8+g/D3wc2D4Pvxh4KiLW5uElwG75/W7Aw7C+N/iVef7lxRVKOgE4AWDChAn09vaunzZhW5j9qrUMpjh/N61evbo0sQxmpMRpZq03ZKKX9Fbg8YiYL2l6bXSdWaOBaRtGRFwAXAAwbdq0KHZz96UrrufshYOH1nfU9EGnd8pI7UrQzEaPRmr0hwBvlzQD2AZ4IamGv4OksblWPwlYmudfAkwGlkgaC7wIeLLlkZuZWUOGbKOPiFMjYlJE9ABHAj+MiKOAW4B35dmOAa7P7+fmYfL0H0bEJjV6MzPrjOFcR38K8DFJi0lt8Bfl8RcBL87jPwbMGV6IZmY2HE11Dh4RvUBvfv8gcGCdeZ4D3t2C2MzMrAWaSvRWTj1zvjvkPJccPq4DkZhZGfkRCGZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhU3ZKKXNFnSLZIWSbpX0kl5/E6S5uXOwedJ2jGPl6Qv5s7B75Y0td0bYWZmA2ukRr8WmB0RewMHA7Mk7UN6/PDNuXPwm9nwOOIjgCn5dQJwfsujNjOzhjXS8ciyiLgrv18FLCL1C1vsBLx/5+CXRXIbqSeqiS2P3MzMGtLUY4ol9QCvAW4HJkTEMkg/BpJekmdb3zl4Vus4fFm/dblz8BYZ6n8F5YjTzLqj4UQvaTvgWuDkiHhaqtcHeJq1zjh3Dt5Gxzb4PPpux2lm3dHQVTeStiQl+Ssi4lt59GO1Jpn89/E8vtY5eE2x43AzM+uwRq66Eakf2EUR8bnCpGIn4P07Bz86X31zMLCy1sRjZmad10jTzSHA+4GFkhbkcacBZwLXSDoeeIgN/cTeCMwAFgPPAMe1NGIzM2vKkIk+Im6lfrs7wGF15g9g1jDjMjOzFvGdsWZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFO9GZmFedEb2ZWcU70ZmYV50RvZlZxTvRmZhXnRG9mVnFtSfSSDpd0f+4gfM7QS5iZWbu0PNFLGgOcS+okfB/gvbkzcTMz64J21OgPBBZHxIMR8UfgKlKH4WZm1gVNdQ7eoHqdgx/Uf6Zi5+DAakn3FyaPB5YP9iE6a5hRts6QsZbBG87aJM7duxWLmXVWOxJ9052Db7IC6c6ImNbqwNphpMQ6UuI0s9ZrR9ONOwc3MyuRdiT6O4ApkvaQtBVwJKnDcDMz64KWN91ExFpJHwW+D4wBLo6Ie5tcTd0mnZIaKbGOlDjNrMWU+vI2M7Oq8p2xZmYV50RvZlZxXU30Qz0qQdLWkq7O02+X1NP5KBuK81hJT0hakF8f7FKcF0t6XNI9A0yXpC/m7bhb0tROx2hmnde1RN/goxKOB1ZExJ7AOUDHb5Nq4pEOV0fEfvn11Y4GucElwOGDTD8CmJJfJwDndyAmM+uybtboG3lUwkzg0vz+m8BhkurdkNVOI+aRDhHxY+DJQWaZCVwWyW3ADpImdiY6M+uWbib6eo9K2G2geSJiLbASeHFHoqsTQ1YvToC/zc0h35Q0uc70Mmh0W8ysQrqZ6Bt5VEJDj1Nos0Zi+A7QExF/CfyADUchZVOG/6eZdVg3E30jj0pYP4+kscCLGLxpoh2GjDMi/hARz+fBC4H9OxRbs/x4CrNRqJuJvpFHJcwFjsnv3wX8MDp/h9eQcfZr5347sKiD8TVjLnB0vvrmYGBlRCzrdlBm1l7teHplQwZ6VIKkTwF3RsRc4CLg65IWk2ryR5Y0zhMlvR1Ym+M8ttNxAkj6BjAdGC9pCXA6sCVARHwZuBGYASwGngGO60acZtZZfgSCmVnF+c5YM7OKc6I3M6s4J3ozs4pzojczqzgnejOzinOiNzOrOCd6M7OK+/9kivWuWu7JewAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.hist();" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassAgeSibSpParchFare
PassengerId1.000000-0.005007-0.0351440.033207-0.057527-0.0016520.012658
Survived-0.0050071.000000-0.338481-0.069809-0.0353220.0816290.257307
Pclass-0.035144-0.3384811.000000-0.3313390.0830810.018443-0.549500
Age0.033207-0.069809-0.3313391.000000-0.232625-0.1791910.091566
SibSp-0.057527-0.0353220.083081-0.2326251.0000000.4148380.159651
Parch-0.0016520.0816290.018443-0.1791910.4148381.0000000.216225
Fare0.0126580.257307-0.5495000.0915660.1596510.2162251.000000
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Age SibSp Parch \\\n", + "PassengerId 1.000000 -0.005007 -0.035144 0.033207 -0.057527 -0.001652 \n", + "Survived -0.005007 1.000000 -0.338481 -0.069809 -0.035322 0.081629 \n", + "Pclass -0.035144 -0.338481 1.000000 -0.331339 0.083081 0.018443 \n", + "Age 0.033207 -0.069809 -0.331339 1.000000 -0.232625 -0.179191 \n", + "SibSp -0.057527 -0.035322 0.083081 -0.232625 1.000000 0.414838 \n", + "Parch -0.001652 0.081629 0.018443 -0.179191 0.414838 1.000000 \n", + "Fare 0.012658 0.257307 -0.549500 0.091566 0.159651 0.216225 \n", + "\n", + " Fare \n", + "PassengerId 0.012658 \n", + "Survived 0.257307 \n", + "Pclass -0.549500 \n", + "Age 0.091566 \n", + "SibSp 0.159651 \n", + "Parch 0.216225 \n", + "Fare 1.000000 " + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "mapping = {'male' : 0, 'female' : 1}" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedAge_Group
0103Braund, Mr. Owen Harris022.010A/5 211717.2500NaNSAdult
1211Cumings, Mrs. John Bradley (Florence Briggs Th...138.010PC 1759971.2833C85CAdult
2313Heikkinen, Miss. Laina126.000STON/O2. 31012827.9250NaNSAdult
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)135.01011380353.1000C123SAdult
4503Allen, Mr. William Henry035.0003734508.0500NaNSAdult
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp Parch \\\n", + "0 Braund, Mr. Owen Harris 0 22.0 1 0 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 0 \n", + "2 Heikkinen, Miss. Laina 1 26.0 0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 0 \n", + "4 Allen, Mr. William Henry 0 35.0 0 0 \n", + "\n", + " Ticket Fare Cabin Embarked Age_Group \n", + "0 A/5 21171 7.2500 NaN S Adult \n", + "1 PC 17599 71.2833 C85 C Adult \n", + "2 STON/O2. 3101282 7.9250 NaN S Adult \n", + "3 113803 53.1000 C123 S Adult \n", + "4 373450 8.0500 NaN S Adult " + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex = df.Sex.replace(mapping)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "Age_Group = pd.get_dummies(df.Age_Group)\n", + "df = pd.concat([df, Age_Group], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [], + "source": [ + "Sex = pd.get_dummies(df.Sex)\n", + "df = pd.concat([df, Sex], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y= df.Survived\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchChildAdultSeniors01
03022.01001010
11138.01001001
23126.00001001
31135.01001001
43035.00001010
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Child Adult Seniors 0 1\n", + "0 3 0 22.0 1 0 0 1 0 1 0\n", + "1 1 1 38.0 1 0 0 1 0 0 1\n", + "2 3 1 26.0 0 0 0 1 0 0 1\n", + "3 1 1 35.0 1 0 0 1 0 0 1\n", + "4 3 0 35.0 0 0 0 1 0 1 0" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = df.drop(columns= ['Survived','Name','PassengerId','Age_Group','Ticket','Cabin','Fare','Embarked'])\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: Survived R-squared: 0.398
Model: OLS Adj. R-squared: 0.394
Method: Least Squares F-statistic: 83.49
Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.04e-93
Time: 08:36:02 Log-Likelihood: -395.67
No. Observations: 891 AIC: 807.3
Df Residuals: 883 BIC: 845.7
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
Pclass -0.1798 0.016 -10.929 0.000 -0.212 -0.147
Sex 0.4919 0.030 16.514 0.000 0.433 0.550
Age -0.0040 0.001 -2.841 0.005 -0.007 -0.001
SibSp -0.0460 0.013 -3.537 0.000 -0.072 -0.020
Parch -0.0248 0.018 -1.367 0.172 -0.060 0.011
Child 0.4141 0.050 8.347 0.000 0.317 0.511
Adult 0.2834 0.038 7.481 0.000 0.209 0.358
Seniors 0.2631 0.124 2.124 0.034 0.020 0.506
0 0.4687 0.059 7.936 0.000 0.353 0.585
1 0.4919 0.030 16.514 0.000 0.433 0.550
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 37.345 Durbin-Watson: 1.938
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.020
Skew: 0.520 Prob(JB): 1.24e-09
Kurtosis: 3.155 Cond. No. 3.66e+16


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.03e-28. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Survived R-squared: 0.398\n", + "Model: OLS Adj. R-squared: 0.394\n", + "Method: Least Squares F-statistic: 83.49\n", + "Date: Mon, 23 Jul 2018 Prob (F-statistic): 5.04e-93\n", + "Time: 08:36:02 Log-Likelihood: -395.67\n", + "No. Observations: 891 AIC: 807.3\n", + "Df Residuals: 883 BIC: 845.7\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Pclass -0.1798 0.016 -10.929 0.000 -0.212 -0.147\n", + "Sex 0.4919 0.030 16.514 0.000 0.433 0.550\n", + "Age -0.0040 0.001 -2.841 0.005 -0.007 -0.001\n", + "SibSp -0.0460 0.013 -3.537 0.000 -0.072 -0.020\n", + "Parch -0.0248 0.018 -1.367 0.172 -0.060 0.011\n", + "Child 0.4141 0.050 8.347 0.000 0.317 0.511\n", + "Adult 0.2834 0.038 7.481 0.000 0.209 0.358\n", + "Seniors 0.2631 0.124 2.124 0.034 0.020 0.506\n", + "0 0.4687 0.059 7.936 0.000 0.353 0.585\n", + "1 0.4919 0.030 16.514 0.000 0.433 0.550\n", + "==============================================================================\n", + "Omnibus: 37.345 Durbin-Watson: 1.938\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.020\n", + "Skew: 0.520 Prob(JB): 1.24e-09\n", + "Kurtosis: 3.155 Cond. No. 3.66e+16\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The smallest eigenvalue is 7.03e-28. This might indicate that there are\n", + "strong multicollinearity problems or that the design matrix is singular.\n", + "\"\"\"" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = sm.OLS(y, x)\n", + "results = model.fit()\n", + "results.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "# linear regression on the modeling dataset\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8169491525423729" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchChildAdultSeniors01
7093029.6991181101010
4392031.0000000001010
8403020.0000000001010
720216.0000000110001
393114.0000001010001
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Child Adult Seniors 0 1\n", + "709 3 0 29.699118 1 1 0 1 0 1 0\n", + "439 2 0 31.000000 0 0 0 1 0 1 0\n", + "840 3 0 20.000000 0 0 0 1 0 1 0\n", + "720 2 1 6.000000 0 1 1 0 0 0 1\n", + "39 3 1 14.000000 1 0 1 0 0 0 1" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}