From 75399f6aefaab78380e7d33a3d0f9a50fa664658 Mon Sep 17 00:00:00 2001 From: Bessie Garrick <bessie.garrick@mckesson.com> Date: Fri, 20 Jul 2018 15:28:14 -0500 Subject: [PATCH] Bessie Titanic complete --- .../Titanic-answer-checkpoint.ipynb | 821 ++++++++++++++++++ Titanic-answer.ipynb | 821 ++++++++++++++++++ 2 files changed, 1642 insertions(+) create mode 100644 .ipynb_checkpoints/Titanic-answer-checkpoint.ipynb create mode 100644 Titanic-answer.ipynb diff --git a/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb b/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb new file mode 100644 index 0000000..533e7db --- /dev/null +++ b/.ipynb_checkpoints/Titanic-answer-checkpoint.ipynb @@ -0,0 +1,821 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 340, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 342, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 342, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = df.Survived\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 343, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.Age.isna()]\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [], + "source": [ + "Age_mean = df.Age.mean()\n", + "df['Age'] = df.Age.fillna(Age_mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 361, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 6 columns):\n", + "Pclass 891 non-null int64\n", + "Sex 891 non-null object\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Fare 891 non-null float64\n", + "dtypes: float64(2), int64(3), object(1)\n", + "memory usage: 41.8+ KB\n" + ] + } + ], + "source": [ + "x = df.drop(columns=['Survived', 'Cabin', 'PassengerId', 'Name','Embarked', 'Ticket'])\n", + "x.head()\n", + "x.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 362, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pclass</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Fare</th>\n", + " <th>Sex_female</th>\n", + " <th>Sex_male</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>3</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>7.2500</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>71.2833</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7.9250</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>53.1000</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>3</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8.0500</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Sex_female Sex_male\n", + "0 3 22.0 1 0 7.2500 0 1\n", + "1 1 38.0 1 0 71.2833 1 0\n", + "2 3 26.0 0 0 7.9250 1 0\n", + "3 1 35.0 1 0 53.1000 1 0\n", + "4 3 35.0 0 0 8.0500 0 1" + ] + }, + "execution_count": 362, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = pd.get_dummies(x)\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 363, + "metadata": {}, + "outputs": [], + "source": [ + "x = x.drop(columns=['Sex_male'])" + ] + }, + { + "cell_type": "code", + "execution_count": 364, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((596, 6), (295, 6))" + ] + }, + "execution_count": 364, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)\n", + "x_train.shape, x_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 365, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pclass</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Fare</th>\n", + " <th>Sex_female</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Pclass</th>\n", + " <td>1.000000</td>\n", + " <td>-0.331339</td>\n", + " <td>0.083081</td>\n", + " <td>0.018443</td>\n", + " <td>-0.549500</td>\n", + " <td>-0.131900</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Age</th>\n", + " <td>-0.331339</td>\n", + " <td>1.000000</td>\n", + " <td>-0.232625</td>\n", + " <td>-0.179191</td>\n", + " <td>0.091566</td>\n", + " <td>-0.084153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>SibSp</th>\n", + " <td>0.083081</td>\n", + " <td>-0.232625</td>\n", + " <td>1.000000</td>\n", + " <td>0.414838</td>\n", + " <td>0.159651</td>\n", + " <td>0.114631</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Parch</th>\n", + " <td>0.018443</td>\n", + " <td>-0.179191</td>\n", + " <td>0.414838</td>\n", + " <td>1.000000</td>\n", + " <td>0.216225</td>\n", + " <td>0.245489</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Fare</th>\n", + " <td>-0.549500</td>\n", + " <td>0.091566</td>\n", + " <td>0.159651</td>\n", + " <td>0.216225</td>\n", + " <td>1.000000</td>\n", + " <td>0.182333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Sex_female</th>\n", + " <td>-0.131900</td>\n", + " <td>-0.084153</td>\n", + " <td>0.114631</td>\n", + " <td>0.245489</td>\n", + " <td>0.182333</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Sex_female\n", + "Pclass 1.000000 -0.331339 0.083081 0.018443 -0.549500 -0.131900\n", + "Age -0.331339 1.000000 -0.232625 -0.179191 0.091566 -0.084153\n", + "SibSp 0.083081 -0.232625 1.000000 0.414838 0.159651 0.114631\n", + "Parch 0.018443 -0.179191 0.414838 1.000000 0.216225 0.245489\n", + "Fare -0.549500 0.091566 0.159651 0.216225 1.000000 0.182333\n", + "Sex_female -0.131900 -0.084153 0.114631 0.245489 0.182333 1.000000" + ] + }, + "execution_count": 365, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 366, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 366, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 367, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 367, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8101694915254237" + ] + }, + "execution_count": 368, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Titanic-answer.ipynb b/Titanic-answer.ipynb new file mode 100644 index 0000000..533e7db --- /dev/null +++ b/Titanic-answer.ipynb @@ -0,0 +1,821 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 340, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 342, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 342, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = df.Survived\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PassengerId</th>\n", + " <th>Survived</th>\n", + " <th>Pclass</th>\n", + " <th>Name</th>\n", + " <th>Sex</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Ticket</th>\n", + " <th>Fare</th>\n", + " <th>Cabin</th>\n", + " <th>Embarked</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Braund, Mr. Owen Harris</td>\n", + " <td>male</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>A/5 21171</td>\n", + " <td>7.2500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", + " <td>female</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>PC 17599</td>\n", + " <td>71.2833</td>\n", + " <td>C85</td>\n", + " <td>C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>Heikkinen, Miss. Laina</td>\n", + " <td>female</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>STON/O2. 3101282</td>\n", + " <td>7.9250</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", + " <td>female</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>113803</td>\n", + " <td>53.1000</td>\n", + " <td>C123</td>\n", + " <td>S</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>Allen, Mr. William Henry</td>\n", + " <td>male</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>373450</td>\n", + " <td>8.0500</td>\n", + " <td>NaN</td>\n", + " <td>S</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 343, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.Age.isna()]\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [], + "source": [ + "Age_mean = df.Age.mean()\n", + "df['Age'] = df.Age.fillna(Age_mean)" + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 361, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 6 columns):\n", + "Pclass 891 non-null int64\n", + "Sex 891 non-null object\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Fare 891 non-null float64\n", + "dtypes: float64(2), int64(3), object(1)\n", + "memory usage: 41.8+ KB\n" + ] + } + ], + "source": [ + "x = df.drop(columns=['Survived', 'Cabin', 'PassengerId', 'Name','Embarked', 'Ticket'])\n", + "x.head()\n", + "x.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 362, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pclass</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Fare</th>\n", + " <th>Sex_female</th>\n", + " <th>Sex_male</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>3</td>\n", + " <td>22.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>7.2500</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>38.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>71.2833</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>26.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7.9250</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1</td>\n", + " <td>35.0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>53.1000</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>3</td>\n", + " <td>35.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8.0500</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Sex_female Sex_male\n", + "0 3 22.0 1 0 7.2500 0 1\n", + "1 1 38.0 1 0 71.2833 1 0\n", + "2 3 26.0 0 0 7.9250 1 0\n", + "3 1 35.0 1 0 53.1000 1 0\n", + "4 3 35.0 0 0 8.0500 0 1" + ] + }, + "execution_count": 362, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = pd.get_dummies(x)\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 363, + "metadata": {}, + "outputs": [], + "source": [ + "x = x.drop(columns=['Sex_male'])" + ] + }, + { + "cell_type": "code", + "execution_count": 364, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((596, 6), (295, 6))" + ] + }, + "execution_count": 364, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)\n", + "x_train.shape, x_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 365, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pclass</th>\n", + " <th>Age</th>\n", + " <th>SibSp</th>\n", + " <th>Parch</th>\n", + " <th>Fare</th>\n", + " <th>Sex_female</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Pclass</th>\n", + " <td>1.000000</td>\n", + " <td>-0.331339</td>\n", + " <td>0.083081</td>\n", + " <td>0.018443</td>\n", + " <td>-0.549500</td>\n", + " <td>-0.131900</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Age</th>\n", + " <td>-0.331339</td>\n", + " <td>1.000000</td>\n", + " <td>-0.232625</td>\n", + " <td>-0.179191</td>\n", + " <td>0.091566</td>\n", + " <td>-0.084153</td>\n", + " </tr>\n", + " <tr>\n", + " <th>SibSp</th>\n", + " <td>0.083081</td>\n", + " <td>-0.232625</td>\n", + " <td>1.000000</td>\n", + " <td>0.414838</td>\n", + " <td>0.159651</td>\n", + " <td>0.114631</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Parch</th>\n", + " <td>0.018443</td>\n", + " <td>-0.179191</td>\n", + " <td>0.414838</td>\n", + " <td>1.000000</td>\n", + " <td>0.216225</td>\n", + " <td>0.245489</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Fare</th>\n", + " <td>-0.549500</td>\n", + " <td>0.091566</td>\n", + " <td>0.159651</td>\n", + " <td>0.216225</td>\n", + " <td>1.000000</td>\n", + " <td>0.182333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Sex_female</th>\n", + " <td>-0.131900</td>\n", + " <td>-0.084153</td>\n", + " <td>0.114631</td>\n", + " <td>0.245489</td>\n", + " <td>0.182333</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Sex_female\n", + "Pclass 1.000000 -0.331339 0.083081 0.018443 -0.549500 -0.131900\n", + "Age -0.331339 1.000000 -0.232625 -0.179191 0.091566 -0.084153\n", + "SibSp 0.083081 -0.232625 1.000000 0.414838 0.159651 0.114631\n", + "Parch 0.018443 -0.179191 0.414838 1.000000 0.216225 0.245489\n", + "Fare -0.549500 0.091566 0.159651 0.216225 1.000000 0.182333\n", + "Sex_female -0.131900 -0.084153 0.114631 0.245489 0.182333 1.000000" + ] + }, + "execution_count": 365, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 366, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 366, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 367, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n", + " 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 367, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8101694915254237" + ] + }, + "execution_count": 368, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}