From fe131bd789233671e7362ca41ae2cfd0ed5afa91 Mon Sep 17 00:00:00 2001
From: Mayukh Mitra <mayukhmitra941@gmail.com>
Date: Fri, 2 Oct 2020 19:50:07 +0530
Subject: [PATCH 1/2] Added Naive Bayes Model

---
 Titanic_naive_bayes.ipynb | 995 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 995 insertions(+)
 create mode 100644 Titanic_naive_bayes.ipynb
diff --git a/Titanic_naive_bayes.ipynb b/Titanic_naive_bayes.ipynb
new file mode 100644
index 0000000..f90ecf9
--- /dev/null
+++ b/Titanic_naive_bayes.ipynb
@@ -0,0 +1,995 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np       ### importing numpy and pandas\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PassengerId</th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Ticket</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Cabin</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Braund, Mr. Owen Harris</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>A/5 21171</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>PC 17599</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C85</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Heikkinen, Miss. Laina</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>STON/O2. 3101282</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>113803</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>C123</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Allen, Mr. William Henry</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>373450</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   PassengerId  Survived  Pclass  \\\n",
+       "0            1         0       3   \n",
+       "1            2         1       1   \n",
+       "2            3         1       3   \n",
+       "3            4         1       1   \n",
+       "4            5         0       3   \n",
+       "\n",
+       "                                                Name     Sex   Age  SibSp  \\\n",
+       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
+       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
+       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
+       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
+       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
+       "\n",
+       "   Parch            Ticket     Fare Cabin Embarked  \n",
+       "0      0         A/5 21171   7.2500   NaN        S  \n",
+       "1      0          PC 17599  71.2833   C85        C  \n",
+       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
+       "3      0            113803  53.1000  C123        S  \n",
+       "4      0            373450   8.0500   NaN        S  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = pd.read_csv(\"E:\\MAYUKH\\\\naive bayes\\\\titanic\\\\titanic.csv\")  ### import the csv dataset\n",
+    "dataset.head()          ### looking at the head of the csv dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Fare</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>7.2500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>71.2833</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>7.9250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>53.1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>8.0500</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Survived  Pclass     Sex   Age     Fare\n",
+       "0         0       3    male  22.0   7.2500\n",
+       "1         1       1  female  38.0  71.2833\n",
+       "2         1       3  female  26.0   7.9250\n",
+       "3         1       1  female  35.0  53.1000\n",
+       "4         0       3    male  35.0   8.0500"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis='columns',inplace=True) ###Dropping the unwanted columns...\n",
+    "dataset.head()  ### After dropping the unwanted columns..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target=dataset.Survived                 ### Fixing Survived columns as target to predict the survivals...\n",
+    "inputs=dataset.drop('Survived',axis='columns') ### Columns other than \"Survived\" are taken as input..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   female  male\n",
+       "0       0     1\n",
+       "1       1     0\n",
+       "2       1     0\n",
+       "3       1     0\n",
+       "4       0     1"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dummies=pd.get_dummies(inputs.Sex) ### Converting the Sex column to integer type from text...\n",
+    "dummies.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Pclass     Sex   Age     Fare  female  male\n",
+       "0       3    male  22.0   7.2500       0     1\n",
+       "1       1  female  38.0  71.2833       1     0\n",
+       "2       3  female  26.0   7.9250       1     0\n",
+       "3       1  female  35.0  53.1000       1     0\n",
+       "4       3    male  35.0   8.0500       0     1"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs = pd.concat([inputs,dummies],axis='columns') ### Appending the dummy columns replacing Sex columns in inputs...\n",
+    "inputs.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Pclass   Age     Fare  female  male\n",
+       "0       3  22.0   7.2500       0     1\n",
+       "1       1  38.0  71.2833       1     0\n",
+       "2       3  26.0   7.9250       1     0\n",
+       "3       1  35.0  53.1000       1     0\n",
+       "4       3  35.0   8.0500       0     1"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs.drop('Sex',axis='columns',inplace=True) ### Dropping the Sex column...\n",
+    "inputs.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Age'], dtype='object')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs.columns[inputs.isna().any()] ### Searching for any NaN value in any column...found in Age column..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    22.0\n",
+       "1    38.0\n",
+       "2    26.0\n",
+       "3    35.0\n",
+       "4    35.0\n",
+       "5     NaN\n",
+       "6    54.0\n",
+       "7     2.0\n",
+       "8    27.0\n",
+       "9    14.0\n",
+       "Name: Age, dtype: float64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs.Age[:10] ###Looking for the NaN values in Age column..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    22.000000\n",
+       "1    38.000000\n",
+       "2    26.000000\n",
+       "3    35.000000\n",
+       "4    35.000000\n",
+       "5    29.699118\n",
+       "6    54.000000\n",
+       "7     2.000000\n",
+       "8    27.000000\n",
+       "9    14.000000\n",
+       "Name: Age, dtype: float64"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "inputs.Age = inputs.Age.fillna(inputs.Age.mean()) ### Filling the NaN values with the mean of the Age column...  \n",
+    "inputs.Age[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split  ### importing the required split method from sklearn.model_selection package...\n",
+    "X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.2) ### Splitting the dataset in 80-20 ratio..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "712"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(X_train) ### Checking the length of the X_component of training dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "179"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(X_test) ### Checking the length of the X_component of the testing dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "891"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(inputs) ### Length of the total dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.naive_bayes import GaussianNB ### importing the GaussianNB method from sklearn.naive_bayes package...\n",
+    "model = GaussianNB()  ### Creating a naive_bayes model..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "GaussianNB()"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(X_train,y_train) ### Running the model on training dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7877094972067039"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test,y_test)  ### Accuracy of the model on the testing dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>female</th>\n",
+       "      <th>male</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>14.4542</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>624</th>\n",
+       "      <td>3</td>\n",
+       "      <td>21.0</td>\n",
+       "      <td>16.1000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>80</th>\n",
+       "      <td>3</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>9.0000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>520</th>\n",
+       "      <td>1</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>93.5000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>635</th>\n",
+       "      <td>2</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>13.0000</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>252</th>\n",
+       "      <td>1</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>26.5500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>632</th>\n",
+       "      <td>1</td>\n",
+       "      <td>32.0</td>\n",
+       "      <td>30.5000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>890</th>\n",
+       "      <td>3</td>\n",
+       "      <td>32.0</td>\n",
+       "      <td>7.7500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>217</th>\n",
+       "      <td>2</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>27.0000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>744</th>\n",
+       "      <td>3</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Pclass   Age     Fare  female  male\n",
+       "73        3  26.0  14.4542       0     1\n",
+       "624       3  21.0  16.1000       0     1\n",
+       "80        3  22.0   9.0000       0     1\n",
+       "520       1  30.0  93.5000       1     0\n",
+       "635       2  28.0  13.0000       1     0\n",
+       "252       1  62.0  26.5500       0     1\n",
+       "632       1  32.0  30.5000       0     1\n",
+       "890       3  32.0   7.7500       0     1\n",
+       "217       2  42.0  27.0000       0     1\n",
+       "744       3  31.0   7.9250       0     1"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_test[:10]  ### First 10 tuples of X_component of testing dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "73     0\n",
+       "624    0\n",
+       "80     0\n",
+       "520    1\n",
+       "635    1\n",
+       "252    0\n",
+       "632    1\n",
+       "890    0\n",
+       "217    0\n",
+       "744    1\n",
+       "Name: Survived, dtype: int64"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_test[:10]  ### First 10 tuples of the y_component of the testing dataset...the target dataset..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict(X_test[:10])  ### Predicting the first 10 tuples..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[9.89072891e-01, 1.09271086e-02],\n",
+       "       [9.88295686e-01, 1.17043139e-02],\n",
+       "       [9.88350452e-01, 1.16495484e-02],\n",
+       "       [8.66667323e-04, 9.99133333e-01],\n",
+       "       [2.96605891e-02, 9.70339411e-01],\n",
+       "       [9.17730305e-01, 8.22696952e-02],\n",
+       "       [9.07417008e-01, 9.25829924e-02],\n",
+       "       [9.89612226e-01, 1.03877742e-02],\n",
+       "       [9.77848958e-01, 2.21510417e-02],\n",
+       "       [9.89516619e-01, 1.04833814e-02]])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict_proba(X_test[:10]) ### Predicting the class probabilities for first 10 tuples..."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From aa1175f034e0c80c2c3f2184c57e22355cbaea12 Mon Sep 17 00:00:00 2001
From: Mayukh Mitra <mayukhmitra941@gmail.com>
Date: Sun, 4 Oct 2020 02:40:09 +0530
Subject: [PATCH 2/2] Added Titanic model

---
 ...ynb => Titanic_Missing_Data_Handling.ipynb | 332 ++----------------
 1 file changed, 20 insertions(+), 312 deletions(-)
 rename Titanic_naive_bayes.ipynb => Titanic_Missing_Data_Handling.ipynb (70%)

diff --git a/Titanic_naive_bayes.ipynb b/Titanic_Missing_Data_Handling.ipynb
similarity index 70%
rename from Titanic_naive_bayes.ipynb
rename to Titanic_Missing_Data_Handling.ipynb
index f90ecf9..823c727 100644
--- a/Titanic_naive_bayes.ipynb
+++ b/Titanic_Missing_Data_Handling.ipynb
@@ -553,6 +553,15 @@
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Looking for missing data in any of the attributes in the data..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -560,7 +569,7 @@
        "Index(['Age'], dtype='object')"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -571,7 +580,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -590,7 +599,7 @@
        "Name: Age, dtype: float64"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -601,7 +610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -620,7 +629,7 @@
        "Name: Age, dtype: float64"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -630,34 +639,14 @@
     "inputs.Age[:10]"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.model_selection import train_test_split  ### importing the required split method from sklearn.model_selection package...\n",
-    "X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.2) ### Splitting the dataset in 80-20 ratio..."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "712"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "len(X_train) ### Checking the length of the X_component of training dataset..."
+    "### After replacing the missing values with the mean value...\n",
+    "### Again checking for any missing data in any of the attributes..."
    ]
   },
   {
@@ -668,7 +657,7 @@
     {
      "data": {
       "text/plain": [
-       "179"
+       "Index([], dtype='object')"
       ]
      },
      "execution_count": 13,
@@ -677,297 +666,16 @@
     }
    ],
    "source": [
-    "len(X_test) ### Checking the length of the X_component of the testing dataset..."
+    "inputs.columns[inputs.isna().any()]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 14,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "891"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(inputs) ### Length of the total dataset..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.naive_bayes import GaussianNB ### importing the GaussianNB method from sklearn.naive_bayes package...\n",
-    "model = GaussianNB()  ### Creating a naive_bayes model..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "GaussianNB()"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.fit(X_train,y_train) ### Running the model on training dataset..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.7877094972067039"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.score(X_test,y_test)  ### Accuracy of the model on the testing dataset..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Pclass</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>Fare</th>\n",
-       "      <th>female</th>\n",
-       "      <th>male</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>73</th>\n",
-       "      <td>3</td>\n",
-       "      <td>26.0</td>\n",
-       "      <td>14.4542</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>624</th>\n",
-       "      <td>3</td>\n",
-       "      <td>21.0</td>\n",
-       "      <td>16.1000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>80</th>\n",
-       "      <td>3</td>\n",
-       "      <td>22.0</td>\n",
-       "      <td>9.0000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>520</th>\n",
-       "      <td>1</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>93.5000</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>635</th>\n",
-       "      <td>2</td>\n",
-       "      <td>28.0</td>\n",
-       "      <td>13.0000</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>252</th>\n",
-       "      <td>1</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>26.5500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>632</th>\n",
-       "      <td>1</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>30.5000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>890</th>\n",
-       "      <td>3</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>7.7500</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>217</th>\n",
-       "      <td>2</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>27.0000</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>744</th>\n",
-       "      <td>3</td>\n",
-       "      <td>31.0</td>\n",
-       "      <td>7.9250</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     Pclass   Age     Fare  female  male\n",
-       "73        3  26.0  14.4542       0     1\n",
-       "624       3  21.0  16.1000       0     1\n",
-       "80        3  22.0   9.0000       0     1\n",
-       "520       1  30.0  93.5000       1     0\n",
-       "635       2  28.0  13.0000       1     0\n",
-       "252       1  62.0  26.5500       0     1\n",
-       "632       1  32.0  30.5000       0     1\n",
-       "890       3  32.0   7.7500       0     1\n",
-       "217       2  42.0  27.0000       0     1\n",
-       "744       3  31.0   7.9250       0     1"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X_test[:10]  ### First 10 tuples of X_component of testing dataset..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "73     0\n",
-       "624    0\n",
-       "80     0\n",
-       "520    1\n",
-       "635    1\n",
-       "252    0\n",
-       "632    1\n",
-       "890    0\n",
-       "217    0\n",
-       "744    1\n",
-       "Name: Survived, dtype: int64"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "y_test[:10]  ### First 10 tuples of the y_component of the testing dataset...the target dataset..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int64)"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.predict(X_test[:10])  ### Predicting the first 10 tuples..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[9.89072891e-01, 1.09271086e-02],\n",
-       "       [9.88295686e-01, 1.17043139e-02],\n",
-       "       [9.88350452e-01, 1.16495484e-02],\n",
-       "       [8.66667323e-04, 9.99133333e-01],\n",
-       "       [2.96605891e-02, 9.70339411e-01],\n",
-       "       [9.17730305e-01, 8.22696952e-02],\n",
-       "       [9.07417008e-01, 9.25829924e-02],\n",
-       "       [9.89612226e-01, 1.03877742e-02],\n",
-       "       [9.77848958e-01, 2.21510417e-02],\n",
-       "       [9.89516619e-01, 1.04833814e-02]])"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.predict_proba(X_test[:10]) ### Predicting the class probabilities for first 10 tuples..."
+    "### So there are no more missing values ini any of the attrinutes in the data..."
    ]
   }
  ],

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S
	Pclass	Age	Fare	female	male
73	3	26.0	14.4542	0	1
624	3	21.0	16.1000	0	1
80	3	22.0	9.0000	0	1
520	1	30.0	93.5000	1	0
635	2	28.0	13.0000	1	0
252	1	62.0	26.5500	0	1
632	1	32.0	30.5000	0	1
890	3	32.0	7.7500	0	1
217	2	42.0	27.0000	0	1
744	3	31.0	7.9250	0	1