From fe131bd789233671e7362ca41ae2cfd0ed5afa91 Mon Sep 17 00:00:00 2001 From: Mayukh Mitra Date: Fri, 2 Oct 2020 19:50:07 +0530 Subject: [PATCH 1/2] Added Naive Bayes Model --- Titanic_naive_bayes.ipynb | 995 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 995 insertions(+) create mode 100644 Titanic_naive_bayes.ipynb diff --git a/Titanic_naive_bayes.ipynb b/Titanic_naive_bayes.ipynb new file mode 100644 index 0000000..f90ecf9 --- /dev/null +++ b/Titanic_naive_bayes.ipynb @@ -0,0 +1,995 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np ### importing numpy and pandas\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = pd.read_csv(\"E:\\MAYUKH\\\\naive bayes\\\\titanic\\\\titanic.csv\") ### import the csv dataset\n", + "dataset.head() ### looking at the head of the csv dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedPclassSexAgeFare
003male22.07.2500
111female38.071.2833
213female26.07.9250
311female35.053.1000
403male35.08.0500
\n", + "
" + ], + "text/plain": [ + " Survived Pclass Sex Age Fare\n", + "0 0 3 male 22.0 7.2500\n", + "1 1 1 female 38.0 71.2833\n", + "2 1 3 female 26.0 7.9250\n", + "3 1 1 female 35.0 53.1000\n", + "4 0 3 male 35.0 8.0500" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis='columns',inplace=True) ###Dropping the unwanted columns...\n", + "dataset.head() ### After dropping the unwanted columns..." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "target=dataset.Survived ### Fixing Survived columns as target to predict the survivals...\n", + "inputs=dataset.drop('Survived',axis='columns') ### Columns other than \"Survived\" are taken as input..." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
femalemale
001
110
210
310
401
\n", + "
" + ], + "text/plain": [ + " female male\n", + "0 0 1\n", + "1 1 0\n", + "2 1 0\n", + "3 1 0\n", + "4 0 1" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dummies=pd.get_dummies(inputs.Sex) ### Converting the Sex column to integer type from text...\n", + "dummies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeFarefemalemale
03male22.07.250001
11female38.071.283310
23female26.07.925010
31female35.053.100010
43male35.08.050001
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age Fare female male\n", + "0 3 male 22.0 7.2500 0 1\n", + "1 1 female 38.0 71.2833 1 0\n", + "2 3 female 26.0 7.9250 1 0\n", + "3 1 female 35.0 53.1000 1 0\n", + "4 3 male 35.0 8.0500 0 1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputs = pd.concat([inputs,dummies],axis='columns') ### Appending the dummy columns replacing Sex columns in inputs...\n", + "inputs.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeFarefemalemale
0322.07.250001
1138.071.283310
2326.07.925010
3135.053.100010
4335.08.050001
\n", + "
" + ], + "text/plain": [ + " Pclass Age Fare female male\n", + "0 3 22.0 7.2500 0 1\n", + "1 1 38.0 71.2833 1 0\n", + "2 3 26.0 7.9250 1 0\n", + "3 1 35.0 53.1000 1 0\n", + "4 3 35.0 8.0500 0 1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputs.drop('Sex',axis='columns',inplace=True) ### Dropping the Sex column...\n", + "inputs.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age'], dtype='object')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputs.columns[inputs.isna().any()] ### Searching for any NaN value in any column...found in Age column..." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 22.0\n", + "1 38.0\n", + "2 26.0\n", + "3 35.0\n", + "4 35.0\n", + "5 NaN\n", + "6 54.0\n", + "7 2.0\n", + "8 27.0\n", + "9 14.0\n", + "Name: Age, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputs.Age[:10] ###Looking for the NaN values in Age column..." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 22.000000\n", + "1 38.000000\n", + "2 26.000000\n", + "3 35.000000\n", + "4 35.000000\n", + "5 29.699118\n", + "6 54.000000\n", + "7 2.000000\n", + "8 27.000000\n", + "9 14.000000\n", + "Name: Age, dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inputs.Age = inputs.Age.fillna(inputs.Age.mean()) ### Filling the NaN values with the mean of the Age column... \n", + "inputs.Age[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split ### importing the required split method from sklearn.model_selection package...\n", + "X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.2) ### Splitting the dataset in 80-20 ratio..." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "712" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(X_train) ### Checking the length of the X_component of training dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "179" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(X_test) ### Checking the length of the X_component of the testing dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "891" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(inputs) ### Length of the total dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.naive_bayes import GaussianNB ### importing the GaussianNB method from sklearn.naive_bayes package...\n", + "model = GaussianNB() ### Creating a naive_bayes model..." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GaussianNB()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X_train,y_train) ### Running the model on training dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7877094972067039" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X_test,y_test) ### Accuracy of the model on the testing dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeFarefemalemale
73326.014.454201
624321.016.100001
80322.09.000001
520130.093.500010
635228.013.000010
252162.026.550001
632132.030.500001
890332.07.750001
217242.027.000001
744331.07.925001
\n", + "
" + ], + "text/plain": [ + " Pclass Age Fare female male\n", + "73 3 26.0 14.4542 0 1\n", + "624 3 21.0 16.1000 0 1\n", + "80 3 22.0 9.0000 0 1\n", + "520 1 30.0 93.5000 1 0\n", + "635 2 28.0 13.0000 1 0\n", + "252 1 62.0 26.5500 0 1\n", + "632 1 32.0 30.5000 0 1\n", + "890 3 32.0 7.7500 0 1\n", + "217 2 42.0 27.0000 0 1\n", + "744 3 31.0 7.9250 0 1" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test[:10] ### First 10 tuples of X_component of testing dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "73 0\n", + "624 0\n", + "80 0\n", + "520 1\n", + "635 1\n", + "252 0\n", + "632 1\n", + "890 0\n", + "217 0\n", + "744 1\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test[:10] ### First 10 tuples of the y_component of the testing dataset...the target dataset..." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int64)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(X_test[:10]) ### Predicting the first 10 tuples..." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[9.89072891e-01, 1.09271086e-02],\n", + " [9.88295686e-01, 1.17043139e-02],\n", + " [9.88350452e-01, 1.16495484e-02],\n", + " [8.66667323e-04, 9.99133333e-01],\n", + " [2.96605891e-02, 9.70339411e-01],\n", + " [9.17730305e-01, 8.22696952e-02],\n", + " [9.07417008e-01, 9.25829924e-02],\n", + " [9.89612226e-01, 1.03877742e-02],\n", + " [9.77848958e-01, 2.21510417e-02],\n", + " [9.89516619e-01, 1.04833814e-02]])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict_proba(X_test[:10]) ### Predicting the class probabilities for first 10 tuples..." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From aa1175f034e0c80c2c3f2184c57e22355cbaea12 Mon Sep 17 00:00:00 2001 From: Mayukh Mitra Date: Sun, 4 Oct 2020 02:40:09 +0530 Subject: [PATCH 2/2] Added Titanic model --- ...ynb => Titanic_Missing_Data_Handling.ipynb | 332 ++---------------- 1 file changed, 20 insertions(+), 312 deletions(-) rename Titanic_naive_bayes.ipynb => Titanic_Missing_Data_Handling.ipynb (70%) diff --git a/Titanic_naive_bayes.ipynb b/Titanic_Missing_Data_Handling.ipynb similarity index 70% rename from Titanic_naive_bayes.ipynb rename to Titanic_Missing_Data_Handling.ipynb index f90ecf9..823c727 100644 --- a/Titanic_naive_bayes.ipynb +++ b/Titanic_Missing_Data_Handling.ipynb @@ -553,6 +553,15 @@ "cell_type": "code", "execution_count": 8, "metadata": {}, + "outputs": [], + "source": [ + "### Looking for missing data in any of the attributes in the data..." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, "outputs": [ { "data": { @@ -560,7 +569,7 @@ "Index(['Age'], dtype='object')" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -571,7 +580,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -590,7 +599,7 @@ "Name: Age, dtype: float64" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -601,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -620,7 +629,7 @@ "Name: Age, dtype: float64" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -630,34 +639,14 @@ "inputs.Age[:10]" ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split ### importing the required split method from sklearn.model_selection package...\n", - "X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.2) ### Splitting the dataset in 80-20 ratio..." - ] - }, { "cell_type": "code", "execution_count": 12, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "712" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "len(X_train) ### Checking the length of the X_component of training dataset..." + "### After replacing the missing values with the mean value...\n", + "### Again checking for any missing data in any of the attributes..." ] }, { @@ -668,7 +657,7 @@ { "data": { "text/plain": [ - "179" + "Index([], dtype='object')" ] }, "execution_count": 13, @@ -677,297 +666,16 @@ } ], "source": [ - "len(X_test) ### Checking the length of the X_component of the testing dataset..." + "inputs.columns[inputs.isna().any()]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "891" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(inputs) ### Length of the total dataset..." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, "outputs": [], "source": [ - "from sklearn.naive_bayes import GaussianNB ### importing the GaussianNB method from sklearn.naive_bayes package...\n", - "model = GaussianNB() ### Creating a naive_bayes model..." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "GaussianNB()" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.fit(X_train,y_train) ### Running the model on training dataset..." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7877094972067039" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.score(X_test,y_test) ### Accuracy of the model on the testing dataset..." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PclassAgeFarefemalemale
73326.014.454201
624321.016.100001
80322.09.000001
520130.093.500010
635228.013.000010
252162.026.550001
632132.030.500001
890332.07.750001
217242.027.000001
744331.07.925001
\n", - "
" - ], - "text/plain": [ - " Pclass Age Fare female male\n", - "73 3 26.0 14.4542 0 1\n", - "624 3 21.0 16.1000 0 1\n", - "80 3 22.0 9.0000 0 1\n", - "520 1 30.0 93.5000 1 0\n", - "635 2 28.0 13.0000 1 0\n", - "252 1 62.0 26.5500 0 1\n", - "632 1 32.0 30.5000 0 1\n", - "890 3 32.0 7.7500 0 1\n", - "217 2 42.0 27.0000 0 1\n", - "744 3 31.0 7.9250 0 1" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_test[:10] ### First 10 tuples of X_component of testing dataset..." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "73 0\n", - "624 0\n", - "80 0\n", - "520 1\n", - "635 1\n", - "252 0\n", - "632 1\n", - "890 0\n", - "217 0\n", - "744 1\n", - "Name: Survived, dtype: int64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_test[:10] ### First 10 tuples of the y_component of the testing dataset...the target dataset..." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int64)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.predict(X_test[:10]) ### Predicting the first 10 tuples..." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[9.89072891e-01, 1.09271086e-02],\n", - " [9.88295686e-01, 1.17043139e-02],\n", - " [9.88350452e-01, 1.16495484e-02],\n", - " [8.66667323e-04, 9.99133333e-01],\n", - " [2.96605891e-02, 9.70339411e-01],\n", - " [9.17730305e-01, 8.22696952e-02],\n", - " [9.07417008e-01, 9.25829924e-02],\n", - " [9.89612226e-01, 1.03877742e-02],\n", - " [9.77848958e-01, 2.21510417e-02],\n", - " [9.89516619e-01, 1.04833814e-02]])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.predict_proba(X_test[:10]) ### Predicting the class probabilities for first 10 tuples..." + "### So there are no more missing values ini any of the attrinutes in the data..." ] } ],