From 2864ab81144a4284b7fce3229bfad9b2f899eab5 Mon Sep 17 00:00:00 2001 From: Miel Hostens Date: Tue, 10 Oct 2023 11:58:46 +0200 Subject: [PATCH] Created using Colaboratory --- SensorBolusPeakDetectionAndRFAlgorithm.ipynb | 7172 ++++++++++++++++++ 1 file changed, 7172 insertions(+) create mode 100644 SensorBolusPeakDetectionAndRFAlgorithm.ipynb diff --git a/SensorBolusPeakDetectionAndRFAlgorithm.ipynb b/SensorBolusPeakDetectionAndRFAlgorithm.ipynb new file mode 100644 index 0000000..f02da52 --- /dev/null +++ b/SensorBolusPeakDetectionAndRFAlgorithm.ipynb @@ -0,0 +1,7172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MROm8RgH0pb9" + }, + "source": [ + "# Notebook for the algorithm\n", + "\n", + "This is a Google Colab Notebook. It has a google machine in the back which will activate the moment you click the play button from the first cell. You can run cells seperately, or all after eachother, it allows you to prototype much easier then using the PyCharm.\n", + "\n", + "The notebook can be shared and editted by multiple people. If you want to work in your own version, you can make a copy of the notebook first." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WQHVFkd_ZcBs" + }, + "source": [ + "#GDrive\n", + "You need to make a link with the data. This is all stored in your google drive , or in the case of this notebook a bovi-analytics folder.\n", + "\n", + "First run the cell underneath, then open the link in the output and copy the code underneath. That way, the Google Drive is connected." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SiNP_qsYZbLs", + "outputId": "aa4f450f-6e4c-411d-9a74-cb0b1dda36e2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/gdrive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/gdrive')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dm_jCmj7At96" + }, + "source": [ + "# Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6RPBeG88Xcz1" + }, + "outputs": [], + "source": [ + "#import general libraries\n", + "import os\n", + "import glob\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from datetime import datetime\n", + "from scipy.stats import iqr\n", + "from scipy.stats import mode\n", + "from scipy.signal import welch\n", + "from scipy.fftpack import fft\n", + "from scipy import signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oeImDZLP78jH" + }, + "outputs": [], + "source": [ + "from sklearn import metrics\n", + "from sklearn.metrics import *\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "#from sklearn.externals import joblib\n", + "from sklearn.utils import resample" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1u1Xv3LxeLBQ" + }, + "source": [ + "# Set directories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4XEH4X1rYgly" + }, + "outputs": [], + "source": [ + "#define directories\n", + "base_dir = '/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje'\n", + "video_dir = '/data/out/video/'\n", + "bolus_dir = '/data/out/bolus/'\n", + "output_dir= '/data/out/out/'\n", + "video_path = base_dir + video_dir\n", + "bolus_path = base_dir + bolus_dir\n", + "output_path = base_dir + output_dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aAxIKxdeYnQ2" + }, + "outputs": [], + "source": [ + "#create output directory\n", + "if os.path.isdir ( output_path ):\n", + " pass\n", + "else:\n", + " os.mkdir (output_path )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uBDpWvipbHfB" + }, + "outputs": [], + "source": [ + "#remove old output file\n", + "for f in os.listdir(output_path):\n", + " print('Existing file : ' + f)\n", + " oldfile = output_path + f\n", + " if os.path.isfile(oldfile):\n", + " os.remove(oldfile)\n", + " print('Removed : ' + oldfile)\n", + " else:\n", + " pass\n", + " print('Did not remove : ' + oldfile)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ol9ZN3e9KMKz" + }, + "source": [ + "# PeakDetectionAlgorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "leKIwy_23HoI" + }, + "source": [ + "## Algorithm function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XhjspdZtAPcr" + }, + "outputs": [], + "source": [ + "def picoalgorithm_gain(data, gain_peakdetection = 35,col=0):\n", + " #data window paramters, 0,5s per sample\n", + " start = 1\n", + " Data = 0\n", + " Rumination = 0\n", + " PeakValue = 0\n", + " PressureDetected = 0\n", + " LowTimeCounterHold = 0\n", + "\n", + " #peak detection parameters\n", + " Slope_Rise = 2 * gain_peakdetection;\n", + " Slope_Fall = gain_peakdetection;\n", + " Actual_Rise = float(0)\n", + " Actual_Fall = float(0)\n", + "\n", + " #rumination detection parameters\n", + " State = 1\n", + " Timer_D = 0\n", + " Timer_E = 0\n", + " Timer_F = 0\n", + " Pulse_High_flank = 0\n", + " Rumination_Time = 10 # 0,5s samplerate -> 5s\n", + " D = 10 # 0,5s samplerate -> 5s\n", + " E = 30 # 0,5s samplerate -> 15s\n", + " F = 10 # 0,5s samplerate -> 5s\n", + " Pulse_detected = 0\n", + " Pulse_detected_old = 0\n", + "\n", + " #low time parameters\n", + " Lowtime_counter = 0\n", + "\n", + " #moving average filter\n", + " MA_Filter = 0\n", + " Baseline = 0\n", + " Windowsize = 150\n", + " Highest_peak = 0\n", + "\n", + " #high pressure detection parameters\n", + " High_Pressure_level = 3500\n", + "\n", + " #loop for pulse detection\n", + " for i in range(1,np.size(data[:, col])):\n", + " Actual_Rise = data[i - 8, col] - data[i - 5, col]\n", + " Actual_Fall = data[i - 8, col] - data[i - 10, col]\n", + "\n", + " #measure highest peak\n", + " if ((data[i, col] - Baseline) > Highest_peak and i > (Windowsize + 10)):\n", + " Highest_peak = (data[i, col] - Baseline)\n", + "\n", + " #calculating moving average filter\n", + " MA_Filter = 0\n", + " if i > Windowsize:\n", + " for j in range(1, Windowsize):\n", + " MA_Filter = MA_Filter + data[i - j, col]\n", + " Baseline = int(MA_Filter / Windowsize)\n", + " data[i, 5] = Baseline\n", + "\n", + " #high pressure level detection\n", + " if ((data[i, col] - Baseline) > High_Pressure_level):\n", + " data[i, 8] = 1000\n", + "\n", + " Pulse_detected = 0\n", + " if Actual_Rise >= Slope_Rise and Actual_Fall >= Slope_Fall:\n", + " Pulse_detected = 1\n", + "\n", + " #flank detection\n", + " if Pulse_detected_old == 0 and Pulse_detected == 1:\n", + " Pulse_High_flank = 1\n", + " Pulse_detected_old = Pulse_detected\n", + " Pulse_detected = 0\n", + " data[i, 1] = Pulse_High_flank\n", + " data[i, 2] = State\n", + " data[i, 4] = LowTimeCounterHold\n", + " if State == 1: #wait for minimal low time\n", + " if Timer_D >= D:\n", + " State = 2\n", + " Lowtime_counter = 0\n", + " Highest_peak = 0\n", + "\n", + " if Pulse_High_flank == 1:\n", + " State = 1\n", + " Timer_D = 0\n", + " Pulse_High_flank = 0\n", + "\n", + " elif State == 2: #wait for first pulse\n", + " if Pulse_High_flank == 1:\n", + " State = 3\n", + " Timer_E = 0;\n", + " PulsLowTime = 0\n", + " Pulse_High_flank = 0\n", + "\n", + " elif State == 3: #wait for second pulse\n", + " if Timer_E == E:\n", + " State = 1\n", + " Timer_D = 0\n", + " if Pulse_High_flank == 1:\n", + " Timer_F = 0;\n", + " Pulse_High_flank = 0\n", + " if Timer_E >= Rumination_Time:\n", + " State = 5\n", + " Timer_F = 0;\n", + " else:\n", + " State = 4\n", + " Timer_F = 0;\n", + "\n", + " elif State == 4: #wait for third pulse or minimal low time\n", + " if Timer_F == F:\n", + " State = 1\n", + " Timer_D = 0\n", + " #normal wave detected\n", + " LowTimeCounterHold = Lowtime_counter/2\n", + " data[i, 3] = 1\n", + " data[i, 6] = Highest_peak\n", + " Rumination = 0\n", + " PeakValue = Highest_peak\n", + " if Pulse_High_flank == 1:\n", + " State = 5\n", + " Pulse_High_flank = 0\n", + "\n", + " elif State == 5: #wait for third pulse or minimal low time\n", + " if Timer_F == F:\n", + " State = 1\n", + " Timer_D = 0\n", + " #rumination wave detected\n", + " LowTimeCounterHold = Lowtime_counter/2\n", + " data[i, 3] = 2\n", + " data[i, 6] = Highest_peak\n", + " Rumination = 1\n", + " PeakValue = Highest_peak\n", + " if Pulse_High_flank == 1:\n", + " if Timer_E <= E: # pulse detected within window E\n", + " State == 5\n", + " Pulse_High_flank = 0\n", + " else:\n", + " State = 1\n", + " Timer_D = 0\n", + " Pulse_High_flank = 0\n", + " # Error, no wave detected\n", + "\n", + " #loop timers 0,5s per loop\n", + " Timer_D += 1\n", + " Timer_F += 1\n", + " Timer_E += 1\n", + " Lowtime_counter += 1\n", + "\n", + " #database output\n", + " data[i, 9] = Rumination\n", + " data[i, 7] = PeakValue\n", + " return data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9balS6NO90OM" + }, + "outputs": [], + "source": [ + "def process_file(file, base_directory, input_directory, output_directory, sep = \",\", store = True, disp = False, gain = 35, col=0):\n", + " infile = base_directory + input_directory + file\n", + " outfile = base_directory + output_directory + 'processed_' + file\n", + "\n", + " #read the file and make sure to change NaN with 0.0\n", + " #dateparse = lambda x: DateTime.strptime(x, '%Y-%m-%d %H:%M:%S')\n", + " raw_df = pd.read_csv (infile, sep=';', decimal=\",\")\n", + " #, parse_dates={'datetime': ['Date', 'Time']}, date_parser=dateparse)" + ] + }, + { + "cell_type": "code", + "source": [ + "#load all files in directory with *.csv\n", + "pd.set_option('display.max_rows', None)\n", + "\n", + "for f in os.listdir (bolus_path):\n", + " if f.endswith ('.csv'):\n", + " print(\"Started processing: \" + f)\n", + " process_file(f, base_dir, bolus_dir, output_dir, sep=\",\", store=True, disp=True, gain= 3, col=0)\n", + "\n", + " print(\"Done processing: \" + f)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oW47wU7-tlTP", + "outputId": "8b53adb8-24ae-4cb4-8bfe-53e8588923bf" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Started processing: bolus20210121.csv\n", + "Done processing: bolus20210121.csv\n", + "Started processing: bolus20210122.csv\n", + "Done processing: bolus20210122.csv\n", + "Started processing: bolus20210119.csv\n", + "Done processing: bolus20210119.csv\n", + "Started processing: bolus20210118.csv\n", + "Done processing: bolus20210118.csv\n", + "Started processing: bolus20210208.csv\n", + "Done processing: bolus20210208.csv\n", + "Started processing: bolus20210205.csv\n", + "Done processing: bolus20210205.csv\n", + "Started processing: bolus20210202.csv\n", + "Done processing: bolus20210202.csv\n", + "Started processing: bolus20210211.csv\n", + "Done processing: bolus20210211.csv\n", + "Started processing: bolus20210219.csv\n", + "Done processing: bolus20210219.csv\n", + "Started processing: bolus20210224.csv\n", + "Done processing: bolus20210224.csv\n", + "Started processing: bolus20210225.csv\n", + "Done processing: bolus20210225.csv\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h2sz6E1x3SsJ" + }, + "source": [ + "## File processing function" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oyZGBgUAhSHR" + }, + "source": [ + "The next algorythm allows\n", + "\n", + "* Fixed gain of 9\n", + "* Allow flexible seperator\n", + "* Allow display/store\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wX6Y6flW9T04" + }, + "outputs": [], + "source": [ + "def process_file(file, base_directory, input_directory, output_directory, sep = \",\", store = True, disp = False, gain = 9, col=0):\n", + " infile = base_directory + input_directory + file\n", + " outfile = base_directory + output_directory + 'processed_' + file\n", + "\n", + " #read the file and make sure to change NaN with 0.0\n", + " raw_df = pd.read_csv (infile, sep, decimal=\",\", dtype={'Date':str, 'Time':str, 'DateTime':str})\n", + " #raw_df['Date']=pd.to_datetime(raw_df['Date'],format=\"%Y-%m-%d\")\n", + " raw_df['MergeDateTime']=pd.to_datetime(raw_df['MergeDateTime'],format=\"%Y-%m-%d %H:%M:%S\")\n", + "\n", + " raw_col = raw_df[['Pressure']].fillna(0)\n", + " raw_merge = raw_df[['MergeDateTime']]\n", + " raw_index = raw_df[['DateTime']]\n", + " raw_pressure = raw_df[['Pressure']]\n", + "\n", + " #explicitely change format to int\n", + " raw_col.Pressure = raw_col.Pressure.astype(float)\n", + "\n", + " #add extra columns with all 0.0 values\n", + " extra_col_df = raw_col.assign(\n", + " pulse_flank = 0.0,\n", + " state = 0.0,\n", + " wave_type = 0.0,\n", + " low_time= 0.0,\n", + " baseline= 0.0,\n", + " highest_peak= 0.0,\n", + " moo= 0.0,\n", + " high_pressure= 0.0,\n", + " time_start= 0.0,\n", + " time_stop= 0.0,\n", + " rumination= 0.0,\n", + " peakvalue= 0.0)\n", + "\n", + " #create numpy array from pandas\n", + " input_array = extra_col_df.to_numpy()\n", + " index_array = raw_index.to_numpy()\n", + " merge_array = raw_merge.to_numpy()\n", + " raw_pressure_array = raw_pressure.to_numpy()\n", + "\n", + " #run picoalgorithm and display result\n", + " result = picoalgorithm_gain(input_array, gain, col)\n", + "\n", + " #create dataframe from result array\n", + " dataset = pd.DataFrame({\n", + " 'DateTime': index_array[:,0],\n", + " 'MergeDateTime': merge_array[:,0],\n", + " 'raw_pressure': raw_pressure_array[:, 0],\n", + " 'ret_pressure': result[:, 0],\n", + " 'pulse_flank': result[:, 1],\n", + " 'state': result[:, 2],\n", + " 'wave_type': result[:, 3],\n", + " 'low_time': result[:, 4],\n", + " 'baseline': result[:, 5],\n", + " 'highest_peak': result[:, 6],\n", + " 'peakvalue': result[:, 7],\n", + " 'high_pressure': result[:, 8],\n", + " 'rumination': result[:, 9]})\n", + "\n", + " #store to csv\n", + " if store == True:\n", + " dataset.to_csv(outfile)\n", + " if disp == True:\n", + " #display(dataset.head(1000))\n", + " dataset.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eeSB_xc5eQSJ" + }, + "source": [ + "## Read input files and creat algorithm output file" + ] + }, + { + "cell_type": "code", + "source": [ + "def process_file(file, base_directory, input_directory, output_directory, sep = \",\", store = True, disp = False, gain = 9, col=0):\n", + " infile = base_directory + input_directory + file\n", + " outfile = base_directory + output_directory + 'processed_' + file\n", + "\n", + " #read the file and make sure to change NaN with 0.0\n", + " raw_df = pd.read_csv (infile, sep, decimal=\",\", dtype={'Date':str, 'Time':str, 'DateTime':str})\n", + " #raw_df['Date']=pd.to_datetime(raw_df['Date'],format=\"%Y-%m-%d\")\n", + " raw_df['MergeDateTime']=pd.to_datetime(raw_df['MergeDateTime'],format=\"%Y-%m-%d %H:%M:%S\")\n", + "\n", + " #store to csv\n", + " if store == True:\n", + " raw_df.to_csv(outfile)\n", + " if disp == True:\n", + " #display(dataset.head(1000))\n", + " raw_df.describe()" + ], + "metadata": { + "id": "Ja8vSQwgwaR1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def process_file(file, base_directory, input_directory, output_directory, sep = \",\", store = True, disp = False, gain = 9, col=0):\n", + " infile = base_directory + input_directory + file\n", + " outfile = base_directory + output_directory + 'processed_' + file\n", + "\n", + " #read the file and make sure to change NaN with 0.0\n", + " raw_df = pd.read_csv (infile, sep, decimal=\",\", dtype={'Date':str, 'Time':str, 'DateTime':str})\n", + " #raw_df['Date']=pd.to_datetime(raw_df['Date'],format=\"%Y-%m-%d\")\n", + " raw_df['MergeDateTime']=pd.to_datetime(raw_df['MergeDateTime'],format=\"%Y-%m-%d %H:%M:%S\")\n", + "\n", + " raw_col = raw_df[['Pressure']].fillna(0)\n", + " raw_merge = raw_df[['MergeDateTime']]\n", + " raw_index = raw_df[['DateTime']]\n", + " raw_pressure = raw_df[['Pressure']]\n", + "\n", + " #explicitely change format to int\n", + " raw_col.Pressure = raw_col.Pressure.astype(float)\n", + "\n", + " #add extra columns with all 0.0 values\n", + " extra_col_df = raw_col.assign(\n", + " pulse_flank = 0.0,\n", + " state = 0.0,\n", + " wave_type = 0.0,\n", + " low_time= 0.0,\n", + " baseline= 0.0,\n", + " highest_peak= 0.0,\n", + " moo= 0.0,\n", + " high_pressure= 0.0,\n", + " time_start= 0.0,\n", + " time_stop= 0.0,\n", + " rumination= 0.0,\n", + " peakvalue= 0.0)\n", + "\n", + " #create numpy array from pandas\n", + " input_array = extra_col_df.to_numpy()\n", + " index_array = raw_index.to_numpy()\n", + " merge_array = raw_merge.to_numpy()\n", + " raw_pressure_array = raw_pressure.to_numpy()\n", + "\n", + " #run picoalgorithm and display result\n", + " result = picoalgorithm_gain(input_array, gain, col)\n", + "\n", + " #create dataframe from result array\n", + " dataset = pd.DataFrame({\n", + " 'DateTime': index_array[:,0],\n", + " 'MergeDateTime': merge_array[:,0],\n", + " 'raw_pressure': raw_pressure_array[:, 0],\n", + " 'ret_pressure': result[:, 0],\n", + " 'pulse_flank': result[:, 1],\n", + " 'state': result[:, 2],\n", + " 'wave_type': result[:, 3],\n", + " 'low_time': result[:, 4],\n", + " 'baseline': result[:, 5],\n", + " 'highest_peak': result[:, 6],\n", + " 'peakvalue': result[:, 7],\n", + " 'high_pressure': result[:, 8],\n", + " 'rumination': result[:, 9]})\n", + "\n", + " #store to csv\n", + " if store == True:\n", + " dataset.to_csv(outfile)\n", + " if disp == True:\n", + " #display(dataset.head(1000))\n", + " dataset.describe()" + ], + "metadata": { + "id": "JY2QJqvMv-Wi" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_0qMiSkfXTgL", + "outputId": "c021c37f-47a7-4efe-9308-124bce585e37" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Started processing: bolus20210121.csv\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: FutureWarning: In a future version of pandas all arguments of read_csv except for the argument 'filepath_or_buffer' will be keyword-only\n", + " import sys\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Done processing: bolus20210121.csv\n", + "Started processing: bolus20210122.csv\n", + "Done processing: bolus20210122.csv\n", + "Started processing: bolus20210119.csv\n", + "Done processing: bolus20210119.csv\n", + "Started processing: bolus20210118.csv\n", + "Done processing: bolus20210118.csv\n", + "Started processing: bolus20210208.csv\n", + "Done processing: bolus20210208.csv\n", + "Started processing: bolus20210205.csv\n", + "Done processing: bolus20210205.csv\n", + "Started processing: bolus20210202.csv\n", + "Done processing: bolus20210202.csv\n", + "Started processing: bolus20210211.csv\n", + "Done processing: bolus20210211.csv\n", + "Started processing: bolus20210219.csv\n", + "Done processing: bolus20210219.csv\n", + "Started processing: bolus20210224.csv\n", + "Done processing: bolus20210224.csv\n", + "Started processing: bolus20210225.csv\n", + "Done processing: bolus20210225.csv\n" + ] + } + ], + "source": [ + "#load all files in directory with *.csv\n", + "pd.set_option('display.max_rows', None)\n", + "\n", + "for f in os.listdir (bolus_path):\n", + " if f.endswith ('.csv'):\n", + " print(\"Started processing: \" + f)\n", + " process_file(f, base_dir, bolus_dir, output_dir, sep=\",\", store=True, disp=True, gain= 3, col=0)\n", + "\n", + " print(\"Done processing: \" + f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "paJjRP5m5dn3", + "outputId": "7f844701-79fb-4094-ed02-4636e0f5b3ae" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210121.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210122.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210119.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210118.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210208.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210205.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210202.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210211.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210219.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210224.csv\n", + "/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/processed_bolus20210225.csv\n" + ] + } + ], + "source": [ + "#create list of processed files\n", + "processed_dir = output_path\n", + "all_files = glob.glob(processed_dir + \"*.csv\")\n", + "\n", + "li = []\n", + "\n", + "for filename in all_files:\n", + " print(filename)\n", + " df = pd.read_csv(filename, index_col=0, header=0, sep = \",\", decimal=\".\")#, dtype={'MergeTime':str}, parse_dates=['MergeTime'])\n", + " #)\n", + " #df['MergeTime']=pd.to_datetime(df['MergeTime'], format=\"%H:%M:%S\")\n", + " #print(type(df['DateTime']))\n", + " df['file'] = filename\n", + " li.append(df)\n", + "\n", + "df_concat_processed = pd.concat(li, axis=0, ignore_index=True)\n", + "\n", + "#create column \"rumination_alg\"\n", + "#df_concat_processed['rumination_alg'] = df_concat_processed['rumination'].apply(lambda x: 1 if x == 1.0 else 0)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ckWnV4EcOlgf" + }, + "source": [ + "## Read input files and creat video output file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pPQj0pfg6JQm", + "outputId": "a0035ce8-49c6-4bbd-b515-6ed921e57b6e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121a.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121a.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210122.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210122.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121b.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121b.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210208.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210208.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210205.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210205.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210118.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210118.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210211.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210211.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210202.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210202.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210119.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210119.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210219.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210219.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210224.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210224.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210225.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210225.csv\n" + ] + } + ], + "source": [ + "path_video = video_path\n", + "all_files = glob.glob(path_video + \"*.csv\")\n", + "li = []\n", + "for filename in all_files:\n", + " print(\"Started processing: \" + filename)\n", + " df = pd.read_csv(filename,\n", + " header=0,\n", + " sep = \",\",\n", + " decimal=\".\",\n", + " nrows = 50000\n", + " )[['Cow','Date','Ruminating','Eating','Sleeping','Drinking','Urinate','Moo', 'CodeBehaviour', 'MergeDateTime', 'Lying']].dropna()\n", + " #df['Behaviour']=df['Ruminating']+df['Eating']+df['Drinking']+df['Sleeping']+df['Urinate']+df['Moo']\n", + " df['Resting'] = df['Lying'].apply(lambda x: 1 if (x == 21 or x== 20) else 0)\n", + " li.append(df)\n", + " print(\"Done processing: \" + filename)\n", + "#create a panda dataframe of processed video files\n", + "df_concat_video = pd.concat(li, axis=0, ignore_index=True)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jnxtE4qLO2e_" + }, + "source": [ + "## Merge processsed files and video files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WlxHuW7xJGHk" + }, + "outputs": [], + "source": [ + "df_inner_merged = pd.merge(df_concat_processed, df_concat_video, on='MergeDateTime', how='inner')\n", + "df_inner_merged['Ruminating'] = df_inner_merged['Ruminating'].replace([8],1.0)\n", + "df_inner_merged['Ruminating'] = df_inner_merged['Ruminating'].replace([0],0.0)" + ] + }, + { + "cell_type": "code", + "source": [ + "file_merged='/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Miel/mergedfile.csv'\n", + "df_inner_merged.to_csv(file_merged)" + ], + "metadata": { + "id": "Ua280AeR_Qhl" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df_concat='/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/out/df_concat_processed.csv'\n", + "df_concat_processed.to_csv(df_concat)" + ], + "metadata": { + "id": "Um0_ecghANMo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "fJbgjym4Gkxo" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iCx9UP140ecx" + }, + "outputs": [], + "source": [ + "final_data_peak_detection=df_inner_merged[df_inner_merged.Date <20210225]\n", + "final_data_peak_detection_selected=df_inner_merged[df_inner_merged.Date ==20210224]" + ] + }, + { + "cell_type": "code", + "source": [ + "final_data_peak_detection.count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9jF7yV5INeiO", + "outputId": "b143be33-003b-49dc-ebcb-f0a645b611dd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DateTime 431163\n", + "MergeDateTime 431163\n", + "raw_pressure 431163\n", + "ret_pressure 431163\n", + "pulse_flank 431163\n", + "state 431163\n", + "wave_type 431163\n", + "low_time 431163\n", + "baseline 431163\n", + "highest_peak 431163\n", + "peakvalue 431163\n", + "high_pressure 431163\n", + "rumination 431163\n", + "file 431163\n", + "Cow 431163\n", + "Date 431163\n", + "Ruminating 431163\n", + "Eating 431163\n", + "Sleeping 431163\n", + "Drinking 431163\n", + "Urinate 431163\n", + "Moo 431163\n", + "CodeBehaviour 431163\n", + "Lying 431163\n", + "Resting 431163\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "np.unique(final_data_peak_detection['Cow'], return_counts=True)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D_Im2-OSNYdr", + "outputId": "40650dd5-c1aa-470a-fd2a-62b06f8017ab" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([ 7, 8, 21, 25]), array([134790, 106815, 94803, 94755]))" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "np.unique(final_data_peak_detection['Sleeping'], return_counts=True)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tYddSx2gJZy-", + "outputId": "1fecf56f-fa6d-4dfa-c521-48f91e43fc94" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([0, 3]), array([427769, 3394]))" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "final_data_rum= final_data_peak_detection[final_data_peak_detection.Ruminating==1]\n", + "final_data_eat= final_data_peak_detection[final_data_peak_detection.Eating==4]\n", + "final_data_rest= final_data_peak_detection[final_data_peak_detection.Resting==1]\n", + "final_data_sleep= final_data_peak_detection[final_data_peak_detection.Sleeping==3]\n", + "\n", + "\n", + "print(\"Time between contraction cycles during rumination:\")\n", + "print(\"mean:\"+str(final_data_rum['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_rum['low_time'].std()))\n", + "print(\"median:\"+str(final_data_rum['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_rum['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during eating:\")\n", + "print(\"mean:\"+str(final_data_eat['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_eat['low_time'].std()))\n", + "print(\"median:\"+str(final_data_eat['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_eat['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during sleeping:\")\n", + "print(\"mean:\"+str(final_data_sleep['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_sleep['low_time'].std()))\n", + "print(\"median:\"+str(final_data_sleep['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_sleep['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during resting:\")\n", + "print(\"mean:\"+str(final_data_rest['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_rest['low_time'].std()))\n", + "print(\"median:\"+str(final_data_rest['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_rest['low_time']))+\"\\n\")\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m65zhu1QY3F-", + "outputId": "07d175b5-29c2-4a83-938b-d212f650bbf7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Time between contraction cycles during rumination:\n", + "mean:50.19786760455993\n", + "std:12.688519054375197\n", + "median:50.5\n", + "iqr:14.0\n", + "\n", + "Time between contraction cycles during eating:\n", + "mean:37.92706353220209\n", + "std:18.693595015809986\n", + "median:35.0\n", + "iqr:16.0\n", + "\n", + "Time between contraction cycles during sleeping:\n", + "mean:50.19549204478491\n", + "std:27.07780997084258\n", + "median:44.0\n", + "iqr:13.0\n", + "\n", + "Time between contraction cycles during resting:\n", + "mean:42.375882325186375\n", + "std:21.52999407933072\n", + "median:38.5\n", + "iqr:17.0\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "VR_TZdz0BqdV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "sns.boxplot(x=\"behaviour\", y=\"low_time\",orient='v', data=final_data_peak_detection_corrected, showfliers=False)\n", + "plt.xlabel('Behaviour')\n", + "plt.ylabel('Time interval between contractions (s)')\n", + "plt.show()" + ], + "metadata": { + "id": "jmon--YHBna4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "98giER1N353M" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "code", + "source": [ + "results = confusion_matrix(final_data_peak_detection['Ruminating'], final_data_peak_detection['rumination'])\n", + "\n", + "print('Confusion matrix peak_detection_algorithm for rumination')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(final_data_peak_detection['Ruminating'], final_data_peak_detection['rumination']))\n", + "print('Report : ')\n", + "print(classification_report(final_data_peak_detection['Ruminating'], final_data_peak_detection['rumination']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lGTooBCDqinL", + "outputId": "d33c044c-b49b-49cd-f6b8-40581961c5e8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Confusion matrix peak_detection_algorithm for rumination\n", + "[[276845 73001]\n", + " [ 22023 59294]]\n", + "Accuracy Score : 0.779610031473016\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0.0 0.93 0.79 0.85 349846\n", + " 1.0 0.45 0.73 0.56 81317\n", + "\n", + " accuracy 0.78 431163\n", + " macro avg 0.69 0.76 0.70 431163\n", + "weighted avg 0.84 0.78 0.80 431163\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "results = confusion_matrix(final_data_peak_detection_selected['Ruminating'], final_data_peak_detection_selected['rumination'])\n", + "\n", + "print('Confusion matrix peak_detection_algorithm for rumination: file10')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(final_data_peak_detection_selected['Ruminating'], final_data_peak_detection_selected['rumination']))\n", + "print('Report : ')\n", + "print(classification_report(final_data_peak_detection_selected['Ruminating'], final_data_peak_detection_selected['rumination']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qN6zPOYP85oZ", + "outputId": "34b42537-fecc-41a3-d91a-dbfb6315e88b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Confusion matrix peak_detection_algorithm for rumination: file10\n", + "[[29889 878]\n", + " [ 672 11754]]\n", + "Accuracy Score : 0.9641145555992869\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0.0 0.98 0.97 0.97 30767\n", + " 1.0 0.93 0.95 0.94 12426\n", + "\n", + " accuracy 0.96 43193\n", + " macro avg 0.95 0.96 0.96 43193\n", + "weighted avg 0.96 0.96 0.96 43193\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y67CSm0le5xk" + }, + "source": [ + "# RandomForestAlgorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jvus0EKfzBnU" + }, + "source": [ + "## Features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "slII8zfOzPU4" + }, + "outputs": [], + "source": [ + "def preprocess_feature_file(file, sep = \",\"):\n", + " #read the file and make sure to change NaN with 0.0\n", + " pressure_df = pd.read_csv(file, sep, decimal=\".\")[['DateTime','MergeDateTime', 'Pressure','Temperature']].dropna()\n", + " # normalisation\n", + " pressure_df[\"Pressure\"] /= np.max(np.abs(pressure_df[\"Pressure\"].values),axis=0)\n", + " pressure_df['MergeDateTime']=pd.to_datetime(pressure_df['MergeDateTime'],format=\"%Y-%m-%d %H:%M:%S\")\n", + " # define columns\n", + " #pressure_df[\"DateTime\"] = pressure_df[\"DateTime\"].astype(str).str[:-6].str.replace('T', ' ', regex=False).astype(str)\n", + " pressure_df[\"file_index\"] = pressure_df.index\n", + " pressure_df[\"feature_filename\"] = file\n", + " return pressure_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-8hzA73pyKku", + "outputId": "c0c3e3f8-0986-46b3-f38b-10cb1af9fdcb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210121.csv\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:5: FutureWarning: In a future version of pandas all arguments of read_csv except for the argument 'filepath_or_buffer' will be keyword-only\n", + " \"\"\"\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210122.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210119.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210118.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210208.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210205.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210202.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210211.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210219.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210224.csv\n", + "Done processing\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/bolus/bolus20210225.csv\n", + "Done processing\n" + ] + } + ], + "source": [ + "all_feature_files = glob.glob(bolus_path + \"*.csv\")\n", + "list_feature_df = []\n", + "for filename in all_feature_files:\n", + " print(\"Started processing: \" + filename)\n", + " df = preprocess_feature_file(filename, sep=\",\")\n", + " list_feature_df.append(df)\n", + " print(\"Done processing\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U2OA7lisyx06" + }, + "outputs": [], + "source": [ + "df_concat_features = pd.concat(list_feature_df, axis=0, ignore_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fZbtZrhhzHA9" + }, + "source": [ + "## Labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vu4FHa6I_I2a" + }, + "outputs": [], + "source": [ + "WIN_SIZE = 240\n", + "WIN_OVERLAP = 1\n", + "BEHAVIOUR_1= 'rumination_video'\n", + "BEHAVIOUR_2= 'eating_video'\n", + "BEHAVIOUR_3= 'drinking_video'\n", + "BEHAVIOUR_4= 'sleeping_video'\n", + "BEHAVIOUR_5= 'urinating_video'\n", + "BEHAVIOUR_6= 'mooing_video'\n", + "BEHAVIOUR_7= 'resting_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8LxhFX5Vzd74" + }, + "outputs": [], + "source": [ + "def preprocess_label_file(file, window_size):\n", + " # read the file\n", + " df_raw = pd.read_csv(file, header=0, sep = \",\", decimal=\".\", nrows = 50000)[['Cow','Date','MergeDateTime','Ruminating','Eating','Sleeping','Drinking','Urinate','Moo', 'Lying','CodeBehaviour']].dropna()\n", + " # make a copy without ' ' in specific code\n", + " df=df_raw\n", + " # make extra columns\n", + " df['rumination_video'] = df['Ruminating'].apply(lambda x: 1 if x == 8 else 0)\n", + " df['eating_video'] = df['Eating'].apply(lambda x: 1 if x == 4 else 0)\n", + " df['sleeping_video'] = df['Sleeping'].apply(lambda x: 1 if x == 3 else 0)\n", + " df['urinating_video'] = df['Urinate'].apply(lambda x: 1 if x == 13 else 0)\n", + " df['drinking_video'] = df['Drinking'].apply(lambda x: 1 if x == 5 else 0)\n", + " df['mooing_video'] = df['Moo'].apply(lambda x: 1 if x == 17 else 0)\n", + " df['resting_video'] = df['Lying'].apply(lambda x: 1 if (x == 21 or x== 20) else 0)\n", + " df['MergeDateTime']=pd.to_datetime(df['MergeDateTime'],format=\"%Y-%m-%d %H:%M:%S\")\n", + "\n", + " #create one colum for behaviour\n", + " df_beh= df\n", + " df_beh[\"eating_video\"].replace({1:2}, inplace= True)\n", + " df_beh[\"drinking_video\"].replace({1:3}, inplace= True)\n", + " df_beh[\"sleeping_video\"].replace({1:4}, inplace= True)\n", + " df_beh[\"urinating_video\"].replace({1:5}, inplace= True)\n", + " df_beh[\"mooing_video\"].replace({1:6}, inplace= True)\n", + " df_beh[\"resting_video\"].replace({1:7}, inplace= True)\n", + "\n", + " df['behaviour']=df_beh['rumination_video']+df_beh['eating_video']+df_beh['resting_video']+df_beh['drinking_video']+df_beh['sleeping_video']+df_beh['mooing_video']+df_beh['urinating_video']\n", + " # most frequent behaviour in time frame\n", + " df['label_filename'] = file\n", + " df['behaviour'] = df['behaviour'].astype(str)\n", + " df['rolling_behaviour_backward'] = df.behaviour.rolling(window = window_size, min_periods=0).apply(lambda x: mode(x)[0])\n", + " df['rolling_rumination_backward'] = df['rolling_behaviour_backward'].apply(lambda x: 1 if x == 1 else 0)\n", + " df['rolling_eating_backward'] = df['rolling_behaviour_backward'].apply(lambda x: 1 if x == 2 else 0)\n", + " df['rolling_resting_backward'] = df['rolling_behaviour_backward'].apply(lambda x: 1 if x == 7 else 0)\n", + " df['rolling_sleeping_backward'] = df['rolling_behaviour_backward'].apply(lambda x: 1 if x == 4 else 0)\n", + " df['rolling_drinking_backward'] = df['rolling_behaviour_backward'].apply(lambda x: 1 if x == 3 else 0)\n", + " # behaviour existing in time frame\n", + " #df['was_ruminating_backward'] = df.code_1_5.rolling(window = window_size, min_periods=0).apply(lambda x:1 if 1.0 in x else 0)\n", + " #df['was_eating_backward'] = df.code_1_5.rolling(window = window_size, min_periods=0).apply(lambda x:1 if 4.0 in x else 0)\n", + " #df['was_resting_backward'] = df.code_1_5.rolling(window = window_size, min_periods=0).apply(lambda x:1 if 2.0 in x else 0)\n", + " #df['was_sleeping_backward'] = df.code_1_5.rolling(window = window_size, min_periods=0).apply(lambda x:1 if 3.0 in x else 0)\n", + " #df['was_drinking_backward'] = df.code_1_5.rolling(window = window_size, min_periods=0).apply(lambda x:1 if 5.0 in x else 0)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WRsaBVE8IXny", + "outputId": "39e48274-5064-4885-947f-f61091df8d45" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121a.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121a.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210122.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210122.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121b.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210121b.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210208.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210208.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210205.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210205.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210118.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210118.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210211.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210211.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210202.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210202.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210119.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210119.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210219.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210219.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210224.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210224.csv\n", + "Started processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210225.csv\n", + "Done processing: /content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Josje/data/out/video/video20210225.csv\n" + ] + } + ], + "source": [ + "all_label_files = glob.glob(video_path + \"*.csv\")\n", + "list_label_df = []\n", + "for filename in all_label_files:\n", + " print(\"Started processing: \" + filename)\n", + " df = preprocess_label_file(filename, WIN_SIZE)\n", + " list_label_df.append(df)\n", + " print(\"Done processing: \" + filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eDYYWCkyg6xS" + }, + "outputs": [], + "source": [ + "df_concat_labels = pd.concat(list_label_df, axis=0, ignore_index=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ab3Rf9kXTPJu" + }, + "source": [ + "## Merge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q8d9jc58AjAk" + }, + "outputs": [], + "source": [ + "df_merged = pd.merge(df_concat_features, df_concat_labels, on='MergeDateTime', how='inner')\n", + "df_merged_corr = df_merged[df_merged.DateTime <='2021-02-24 22:30:00']" + ] + }, + { + "cell_type": "code", + "source": [ + "df_merged_corr.count()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ESeeiLd41qPu", + "outputId": "57cd4bdd-466e-4710-ac83-8f92012024ae" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DateTime 431163\n", + "MergeDateTime 431163\n", + "Pressure 431163\n", + "Temperature 431163\n", + "file_index 431163\n", + "feature_filename 431163\n", + "Cow 431163\n", + "Date 431163\n", + "Ruminating 431163\n", + "Eating 431163\n", + "Sleeping 431163\n", + "Drinking 431163\n", + "Urinate 431163\n", + "Moo 431163\n", + "Lying 431163\n", + "CodeBehaviour 431163\n", + "rumination_video 431163\n", + "eating_video 431163\n", + "sleeping_video 431163\n", + "urinating_video 431163\n", + "drinking_video 431163\n", + "mooing_video 431163\n", + "resting_video 431163\n", + "behaviour 431163\n", + "label_filename 431163\n", + "rolling_behaviour_backward 431163\n", + "rolling_rumination_backward 431163\n", + "rolling_eating_backward 431163\n", + "rolling_resting_backward 431163\n", + "rolling_sleeping_backward 431163\n", + "rolling_drinking_backward 431163\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_merged_corr['eating_video'] = df_merged_corr['eating_video'].replace([2],1)\n", + "df_merged_corr['sleeping_video'] = df_merged_corr['sleeping_video'].replace([4],1)\n", + "df_merged_corr['drinking_video'] = df_merged_corr['drinking_video'].replace([3],1)\n", + "df_merged_corr['urinating_video'] = df_merged_corr['urinating_video'].replace([5],1)\n", + "df_merged_corr['mooing_video'] = df_merged_corr['mooing_video'].replace([6],1)\n", + "df_merged_corr['resting_video'] = df_merged_corr['resting_video'].replace([7],1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vofP9a7Xf6Nu", + "outputId": "2ac48937-7de5-4ced-84ce-52fbfa99358e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " after removing the cwd from sys.path.\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \"\"\"\n", + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "file_merged='/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Miel/data/out/.csv'\n", + "df_merged.to_csv(file_merged)" + ], + "metadata": { + "id": "VKGVfyOS-4yq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 328 + }, + "outputId": "1d63fcac-63ca-45bd-d33e-612798095f4c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "FileNotFoundError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mfile_merged\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Miel/data/out/.csv'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf_merged\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_merged\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[1;32m 3480\u001b[0m \u001b[0mdoublequote\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdoublequote\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3481\u001b[0m \u001b[0mescapechar\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mescapechar\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3482\u001b[0;31m \u001b[0mstorage_options\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstorage_options\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3483\u001b[0m )\n\u001b[1;32m 3484\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/io/formats/format.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1104\u001b[0m )\n\u001b[0;32m-> 1105\u001b[0;31m \u001b[0mcsv_formatter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcreated_buffer\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/io/formats/csvs.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompression\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0mstorage_options\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstorage_options\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m ) as handles:\n\u001b[1;32m 245\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 705\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m \u001b[0mnewline\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 708\u001b[0m )\n\u001b[1;32m 709\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/SensorBolus/Miel/data/out/.csv'" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QuvEv0Ntxog9" + }, + "source": [ + "## Sliding windows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E7mzzmfggehe" + }, + "outputs": [], + "source": [ + "def np_ffill(arr, axis):\n", + " idx_shape = tuple([slice(None)] + [np.newaxis] * (len(arr.shape) - axis - 1))\n", + " idx = np.where(~np.isnan(arr), np.arange(arr.shape[axis])[idx_shape], 0)\n", + " np.maximum.accumulate(idx, axis=axis, out=idx)\n", + " slc = [np.arange(k)[tuple([slice(None) if dim==i else np.newaxis\n", + " for dim in range(len(arr.shape))])]\n", + " for i, k in enumerate(arr.shape)]\n", + " slc[axis] = idx\n", + " return arr[tuple(slc)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mCYd4fmFTQAB" + }, + "outputs": [], + "source": [ + "def extract_sliding_windows(df, label_col_1, label_col_2, label_col_3, label_col_4, label_col_5, label_col_6, label_col_7, pressure_col, id_col, sub_window_size, chopped):\n", + " series = []\n", + " max_time = len(df)\n", + " pressure_index = df.columns.get_loc(pressure_col)\n", + " for i in range(max_time-sub_window_size+1):\n", + " if df[\"file_index\"][i] % chopped == 0.0:\n", + " serie = df[pressure_col][i:sub_window_size+i-1].values\n", + " forward_serie = np_ffill(serie, 0)\n", + " backfill_serie = np_ffill(forward_serie[::-1], 0)[::-1]\n", + " id_arr = df[id_col][i:sub_window_size+i-1].values\n", + " id = id_arr[::-1][0]\n", + " label_arr_1 = df[label_col_1][i:sub_window_size+i-1].values\n", + " label_1 = label_arr_1[::-1][0]\n", + " label_arr_2 = df[label_col_2][i:sub_window_size+i-1].values\n", + " label_2 = label_arr_2[::-1][0]\n", + " label_arr_3 = df[label_col_3][i:sub_window_size+i-1].values\n", + " label_3 = label_arr_3[::-1][0]\n", + " label_arr_4 = df[label_col_4][i:sub_window_size+i-1].values\n", + " label_4 = label_arr_4[::-1][0]\n", + " label_arr_5 = df[label_col_5][i:sub_window_size+i-1].values\n", + " label_5 = label_arr_5[::-1][0]\n", + " label_arr_6 = df[label_col_6][i:sub_window_size+i-1].values\n", + " label_6 = label_arr_6[::-1][0]\n", + " label_arr_7 = df[label_col_7][i:sub_window_size+i-1].values\n", + " label_7 = label_arr_7[::-1][0]\n", + " series.append((id, label_1, label_2, label_3, label_4, label_5, label_6, label_7, backfill_serie))\n", + " return np.vstack(series)" + ] + }, + { + "cell_type": "code", + "source": [ + "result_np = extract_sliding_windows(df_merged, BEHAVIOUR_1, BEHAVIOUR_2, BEHAVIOUR_3, BEHAVIOUR_4, BEHAVIOUR_5, BEHAVIOUR_6, BEHAVIOUR_7, \"Pressure\", \"Cow\",WIN_SIZE, WIN_OVERLAP)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1kKRE5vAVHGW", + "outputId": "30d597db-2ea0-4374-db72-bbb8ef141743" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/numpy/core/shape_base.py:121: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " ary = asanyarray(ary)\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nPd5OWZGdXFY", + "outputId": "8282fe6d-d3ec-463f-bd23-56bd20d862ba" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/numpy/core/shape_base.py:121: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " ary = asanyarray(ary)\n" + ] + } + ], + "source": [ + "result_np_1 = extract_sliding_windows(df_merged_corr, BEHAVIOUR_1, BEHAVIOUR_2, BEHAVIOUR_3, BEHAVIOUR_4, BEHAVIOUR_5, BEHAVIOUR_6, BEHAVIOUR_7, \"Pressure\", \"Cow\",WIN_SIZE, WIN_OVERLAP)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y4ipE_kPw4FX" + }, + "source": [ + "## Signal processing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XxoUSDntMJ6Q" + }, + "source": [ + "Adopted from http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectPeaks.ipynb\n", + "\n", + "Thank you Marcos Duarte" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WOyUZBM5cOf7" + }, + "outputs": [], + "source": [ + "def detect_peaks(x, mph=None, mpd=1, threshold=0, edge='rising',\n", + " kpsh=False, valley=False, show=False, ax=None):\n", + "\n", + " \"\"\"Detect peaks in data based on their amplitude and other features.\n", + "\n", + " Parameters\n", + " ----------\n", + " x : 1D array_like\n", + " data.\n", + " mph : {None, number}, optional (default = None)\n", + " detect peaks that are greater than minimum peak height.\n", + " mpd : positive integer, optional (default = 1)\n", + " detect peaks that are at least separated by minimum peak distance (in\n", + " number of data).\n", + " threshold : positive number, optional (default = 0)\n", + " detect peaks (valleys) that are greater (smaller) than `threshold`\n", + " in relation to their immediate neighbors.\n", + " edge : {None, 'rising', 'falling', 'both'}, optional (default = 'rising')\n", + " for a flat peak, keep only the rising edge ('rising'), only the\n", + " falling edge ('falling'), both edges ('both'), or don't detect a\n", + " flat peak (None).\n", + " kpsh : bool, optional (default = False)\n", + " keep peaks with same height even if they are closer than `mpd`.\n", + " valley : bool, optional (default = False)\n", + " if True (1), detect valleys (local minima) instead of peaks.\n", + " show : bool, optional (default = False)\n", + " if True (1), plot data in matplotlib figure.\n", + " ax : a matplotlib.axes.Axes instance, optional (default = None).\n", + "\n", + " Returns\n", + " -------\n", + " ind : 1D array_like\n", + " indeces of the peaks in `x`.\n", + "\n", + " Notes\n", + " -----\n", + " The detection of valleys instead of peaks is performed internally by simply\n", + " negating the data: `ind_valleys = detect_peaks(-x)`\n", + "\n", + " The function can handle NaN's\n", + "\n", + " See this IPython Notebook [1]_.\n", + "\n", + " References\n", + " ----------\n", + " .. [1] http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectPeaks.ipynb\n", + "\n", + " Examples\n", + " --------\n", + " >>> from detect_peaks import detect_peaks\n", + " >>> x = np.random.randn(100)\n", + " >>> x[60:81] = np.nan\n", + " >>> # detect all peaks and plot data\n", + " >>> ind = detect_peaks(x, show=True)\n", + " >>> print(ind)\n", + "\n", + " >>> x = np.sin(2*np.pi*5*np.linspace(0, 1, 200)) + np.random.randn(200)/5\n", + " >>> # set minimum peak height = 0 and minimum peak distance = 20\n", + " >>> detect_peaks(x, mph=0, mpd=20, show=True)\n", + "\n", + " >>> x = [0, 1, 0, 2, 0, 3, 0, 2, 0, 1, 0]\n", + " >>> # set minimum peak distance = 2\n", + " >>> detect_peaks(x, mpd=2, show=True)\n", + "\n", + " >>> x = np.sin(2*np.pi*5*np.linspace(0, 1, 200)) + np.random.randn(200)/5\n", + " >>> # detection of valleys instead of peaks\n", + " >>> detect_peaks(x, mph=0, mpd=20, valley=True, show=True)\n", + "\n", + " >>> x = [0, 1, 1, 0, 1, 1, 0]\n", + " >>> # detect both edges\n", + " >>> detect_peaks(x, edge='both', show=True)\n", + "\n", + " >>> x = [-2, 1, -2, 2, 1, 1, 3, 0]\n", + " >>> # set threshold = 2\n", + " >>> detect_peaks(x, threshold = 2, show=True)\n", + " \"\"\"\n", + "\n", + " x = np.atleast_1d(x).astype('float64')\n", + " if x.size < 3:\n", + " return np.array([], dtype=int)\n", + " if valley:\n", + " x = -x\n", + " # find indices of all peaks\n", + " dx = x[1:] - x[:-1]\n", + " # handle NaN's\n", + " indnan = np.where(np.isnan(x))[0]\n", + " if indnan.size:\n", + " x[indnan] = np.inf\n", + " dx[np.where(np.isnan(dx))[0]] = np.inf\n", + " ine, ire, ife = np.array([[], [], []], dtype=int)\n", + " if not edge:\n", + " ine = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) > 0))[0]\n", + " else:\n", + " if edge.lower() in ['rising', 'both']:\n", + " ire = np.where((np.hstack((dx, 0)) <= 0) & (np.hstack((0, dx)) > 0))[0]\n", + " if edge.lower() in ['falling', 'both']:\n", + " ife = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) >= 0))[0]\n", + " ind = np.unique(np.hstack((ine, ire, ife)))\n", + " # handle NaN's\n", + " if ind.size and indnan.size:\n", + " # NaN's and values close to NaN's cannot be peaks\n", + " ind = ind[np.in1d(ind, np.unique(np.hstack((indnan, indnan-1, indnan+1))), invert=True)]\n", + " # first and last values of x cannot be peaks\n", + " if ind.size and ind[0] == 0:\n", + " ind = ind[1:]\n", + " if ind.size and ind[-1] == x.size-1:\n", + " ind = ind[:-1]\n", + " # remove peaks < minimum peak height\n", + " if ind.size and mph is not None:\n", + " ind = ind[x[ind] >= mph]\n", + " # remove peaks - neighbors < threshold\n", + " if ind.size and threshold > 0:\n", + " dx = np.min(np.vstack([x[ind]-x[ind-1], x[ind]-x[ind+1]]), axis=0)\n", + " ind = np.delete(ind, np.where(dx < threshold)[0])\n", + " # detect small peaks closer than minimum peak distance\n", + " if ind.size and mpd > 1:\n", + " ind = ind[np.argsort(x[ind])][::-1] # sort ind by peak height\n", + " idel = np.zeros(ind.size, dtype=bool)\n", + " for i in range(ind.size):\n", + " if not idel[i]:\n", + " # keep peaks with the same height if kpsh is True\n", + " idel = idel | (ind >= ind[i] - mpd) & (ind <= ind[i] + mpd) \\\n", + " & (x[ind[i]] > x[ind] if kpsh else True)\n", + " idel[i] = 0 # Keep current peak\n", + " # remove the small peaks and sort back the indices by their occurrence\n", + " ind = np.sort(ind[~idel])\n", + "\n", + " if show:\n", + " if indnan.size:\n", + " x[indnan] = np.nan\n", + " if valley:\n", + " x = -x\n", + " _plot(x, mph, mpd, threshold, edge, valley, ax, ind)\n", + "\n", + " return ind" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gu1Oav0mWT3x" + }, + "outputs": [], + "source": [ + "def get_values(y_values, T, N, f_s):\n", + " y_values = y_values\n", + " x_values = [(1/f_s) * kk for kk in range(0,len(y_values))]\n", + " return x_values, y_values\n", + "\n", + "def get_fft_values(y_values, T, N, f_s):\n", + " f_values = np.linspace(0.0, 1.0/(2.0*T), N//2)\n", + " fft_values_ = fft(y_values)\n", + " fft_values = 2.0/N * np.abs(fft_values_[0:N//2])\n", + " return f_values, fft_values\n", + "\n", + "def get_psd_values(y_values, T, N, f_s):\n", + " f_values, psd_values = welch(y_values, fs=f_s)\n", + " return f_values, psd_values\n", + "\n", + "def autocorr(x):\n", + " result = np.correlate(x, x, mode='full')\n", + " return result[len(result)//2:]\n", + "\n", + "def get_autocorr_values(y_values, T, N, f_s):\n", + " autocorr_values = autocorr(y_values)\n", + " x_values = np.array([T * jj for jj in range(0, N)])\n", + " return x_values, autocorr_values\n", + "\n", + "def get_first_n_peaks(x,y,no_peaks=5):\n", + " x_, y_ = list(x), list(y)\n", + " if len(x_) >= no_peaks:\n", + " return x_[:no_peaks], y_[:no_peaks]\n", + " else:\n", + " missing_no_peaks = no_peaks-len(x_)\n", + " return x_ + [0]*missing_no_peaks, y_ + [0]*missing_no_peaks\n", + "\n", + "def get_features(x_values, y_values, mph):\n", + " indices_peaks = detect_peaks(y_values, mph=mph)\n", + " peaks_x, peaks_y = get_first_n_peaks(x_values[indices_peaks], y_values[indices_peaks])\n", + " return peaks_x + peaks_y\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4tUOn5cTs_xi" + }, + "outputs": [], + "source": [ + "def extract_features(dataset, T, N, f_s, denominator):\n", + " percentile = 5\n", + " list_of_features = []\n", + " for signal_no in range(0, len(dataset)):\n", + " if signal_no % 10000 == 0.0:\n", + " print(signal_no)\n", + " features = []\n", + " id = dataset[signal_no, 0]\n", + " label_1 = dataset[signal_no,1]\n", + " label_2 = dataset[signal_no,2]\n", + " label_3 = dataset[signal_no,3]\n", + " label_4 = dataset[signal_no,4]\n", + " label_5 = dataset[signal_no,5]\n", + " label_6 = dataset[signal_no,6]\n", + " label_7 = dataset[signal_no,7]\n", + " signal = dataset[signal_no,8]\n", + " signal_min = np.nanpercentile(signal, percentile)\n", + " signal_max = np.nanpercentile(signal, 100-percentile)\n", + " mph = 0\n", + " #create features\n", + " features += get_features(*get_psd_values(signal, T, N, f_s), mph)\n", + " features += get_features(*get_fft_values(signal, T, N, f_s), mph)\n", + " features += get_features(*get_autocorr_values(signal, T, N, f_s), mph)\n", + " list_of_features.append((label_1, label_2, label_3, label_4, label_5, label_6, label_7, id, features))\n", + " return np.vstack(list_of_features)" + ] + }, + { + "cell_type": "code", + "source": [ + " N = WIN_SIZE #samples\n", + " f_s = 2 #frequency\n", + " t_n = 0.5*N\n", + " T = t_n / N\n", + " sample_rate = 1 / f_s\n", + " denominator = 100" + ], + "metadata": { + "id": "HnTeu7H3Ks-a" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "signal_features_1= extract_features(result_np_1, T, N, f_s, denominator)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XuK3mUzAKw0_", + "outputId": "38daff06-7229-48c6-82e3-d1e961b028ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/scipy/signal/spectral.py:1966: UserWarning: nperseg = 256 is greater than input length = 239, using nperseg = 239\n", + " .format(nperseg, input_length))\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "10000\n", + "20000\n", + "30000\n", + "40000\n", + "50000\n", + "60000\n", + "70000\n", + "80000\n", + "90000\n", + "100000\n", + "110000\n", + "120000\n", + "130000\n", + "140000\n", + "150000\n", + "160000\n", + "170000\n", + "180000\n", + "190000\n", + "200000\n", + "210000\n", + "220000\n", + "230000\n", + "240000\n", + "250000\n", + "260000\n", + "270000\n", + "280000\n", + "290000\n", + "300000\n", + "310000\n", + "320000\n", + "330000\n", + "340000\n", + "350000\n", + "360000\n", + "370000\n", + "380000\n", + "390000\n", + "400000\n", + "410000\n", + "420000\n", + "430000\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/numpy/core/shape_base.py:121: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " ary = asanyarray(ary)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xdYju3T0U3zs" + }, + "source": [ + "# RandomForestAlgorithm Rumination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GKVyOQwnw_0I" + }, + "outputs": [], + "source": [ + "BEHAVIOUR= 'rumination_video'" + ] + }, + { + "cell_type": "code", + "source": [ + "X_final = np.stack(signal_features_1[:,8], axis =0)" + ], + "metadata": { + "id": "F-sx9EwN3woU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m-WUxDbIt15t" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,0], axis =0) # BEHAVIOUR_1= 'rumination_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lCalRNXaFTEc" + }, + "outputs": [], + "source": [ + "# Split dataset into trainingsset (70%) and testset (30%)\n", + "X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.3, random_state=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AJxQpu8vWp8X" + }, + "source": [ + "## Test settings Random Forest Algorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uyYDnrCqBXwe" + }, + "source": [ + "\n", + "### Test number of trees\n", + "Adopted from https://medium.com/all-things-ai/in-depth-parameter-tuning-for-random-forest-d67bb7e920d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8cnMf9BfBage", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9b92aea3-acb1-4a79-c2e5-ad0ec8c96def" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "id": "lMOw039ijsdD", + "outputId": "604dd3e2-26d1-4989-ac54-fe0efffb4e9f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wY-eagU2DICj" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "etmoBtlTDJqq", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fcada31a-3506-4531-a86e-8e1b2b6bde4c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 30, 30, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 20)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VJbzCX1jDy6r", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "f1f7a25f-7e99-4556-a202-0e4a67a07d52" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Fuu1C-sH3ox" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SsOXxGkMIHDK" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=30, max_depth=30, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fbJp0T8quEle" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3f4c83b5-d862-48d9-91a7-ec675174fd31", + "id": "NmXcEpqfhsDU" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[104822 77]\n", + " [ 927 23452]]\n", + "Accuracy Score : 0.9922337907455252\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 1.00 104899\n", + " 1 1.00 0.96 0.98 24379\n", + "\n", + " accuracy 0.99 129278\n", + " macro avg 0.99 0.98 0.99 129278\n", + "weighted avg 0.99 0.99 0.99 129278\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rgyuCsX-5K6r" + }, + "source": [ + "##Feature Importances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o75UISZPJfOu", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b1e251b1-e676-44ea-d28b-d7846041ba8a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.02060\n", + "Feature: 1, Score: 0.03040\n", + "Feature: 2, Score: 0.03202\n", + "Feature: 3, Score: 0.03409\n", + "Feature: 4, Score: 0.04025\n", + "Feature: 5, Score: 0.08351\n", + "Feature: 6, Score: 0.07227\n", + "Feature: 7, Score: 0.06754\n", + "Feature: 8, Score: 0.06528\n", + "Feature: 9, Score: 0.06539\n", + "Feature: 10, Score: 0.02346\n", + "Feature: 11, Score: 0.02872\n", + "Feature: 12, Score: 0.02925\n", + "Feature: 13, Score: 0.02895\n", + "Feature: 14, Score: 0.03403\n", + "Feature: 15, Score: 0.08225\n", + "Feature: 16, Score: 0.06921\n", + "Feature: 17, Score: 0.06450\n", + "Feature: 18, Score: 0.06520\n", + "Feature: 19, Score: 0.06307\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u98YWPj95v3o" + }, + "source": [ + "# Stratified cross validation rumination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T8ilE754VkTq", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ac1d63a7-d3b0-4c69-f906-68aed97100b5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Windows size is 120.0 seconds with overlap 0.5 seconds\n", + "[[69873 49]\n", + " [ 789 15474]]\n", + "Accuracy for the fold no. 1 on the test set: 0.9902767302894935\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 69922\n", + " 1 1.00 0.95 0.97 16263\n", + "\n", + " accuracy 0.99 86185\n", + " macro avg 0.99 0.98 0.98 86185\n", + "weighted avg 0.99 0.99 0.99 86185\n", + "\n", + "Windows size is 120.0 seconds with overlap 0.5 seconds\n", + "[[69859 63]\n", + " [ 770 15493]]\n", + "Accuracy for the fold no. 2 on the test set: 0.9903347450252364\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 69922\n", + " 1 1.00 0.95 0.97 16263\n", + "\n", + " accuracy 0.99 86185\n", + " macro avg 0.99 0.98 0.98 86185\n", + "weighted avg 0.99 0.99 0.99 86185\n", + "\n", + "Windows size is 120.0 seconds with overlap 0.5 seconds\n", + "[[69868 54]\n", + " [ 791 15472]]\n", + "Accuracy for the fold no. 3 on the test set: 0.9901955096594535\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 69922\n", + " 1 1.00 0.95 0.97 16263\n", + "\n", + " accuracy 0.99 86185\n", + " macro avg 0.99 0.98 0.98 86185\n", + "weighted avg 0.99 0.99 0.99 86185\n", + "\n", + "Windows size is 120.0 seconds with overlap 0.5 seconds\n", + "[[69880 41]\n", + " [ 848 15416]]\n", + "Accuracy for the fold no. 4 on the test set: 0.9896849799849161\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 69921\n", + " 1 1.00 0.95 0.97 16264\n", + "\n", + " accuracy 0.99 86185\n", + " macro avg 0.99 0.97 0.98 86185\n", + "weighted avg 0.99 0.99 0.99 86185\n", + "\n", + "Windows size is 120.0 seconds with overlap 0.5 seconds\n", + "[[69860 61]\n", + " [ 797 15466]]\n", + "Accuracy for the fold no. 5 on the test set: 0.9900445558340295\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 69921\n", + " 1 1.00 0.95 0.97 16263\n", + "\n", + " accuracy 0.99 86184\n", + " macro avg 0.99 0.98 0.98 86184\n", + "weighted avg 0.99 0.99 0.99 86184\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import StratifiedKFold\n", + "rf = RandomForestClassifier(n_estimators=25, max_depth=25, max_features=\"sqrt\")\n", + "skf = StratifiedKFold(n_splits=5, random_state=None, shuffle=True)\n", + "i=1\n", + "\n", + "for train_index, test_index in skf.split(X_final, y_final):\n", + " X_train, X_test = X_final[train_index], X_final[test_index]\n", + " y_train, y_test = y_final[train_index], y_final[test_index]\n", + " rf.fit(X_train,y_train)\n", + "# cross_validate also allows to specify metrics which you want to see\n", + " print (\"Windows size is \"+ str(WIN_SIZE/2)+\" seconds with overlap \"+ str(WIN_OVERLAP/2)+ \" seconds\")\n", + " print(confusion_matrix(y_test,rf.predict(X_test)))\n", + " print(f\"Accuracy for the fold no. {i} on the test set: {accuracy_score(y_test, rf.predict(X_test))}\")\n", + " print('Report : ')\n", + " print(classification_report(y_test, rf.predict(X_test)))\n", + " i += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nNJvh0c25_1p" + }, + "source": [ + "# Leave-out-one-animal rumination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8TrSfLzaWRI2" + }, + "outputs": [], + "source": [ + "# Choose cow to leave out 7; 8; 21; 84.0\n", + "COW_NUMBER= 21" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ylXCsfbD_QIW" + }, + "outputs": [], + "source": [ + "# Choose cow to leave out 7; 8; 21; 84.0\n", + "COW_NUMBER= 7" + ] + }, + { + "cell_type": "code", + "source": [ + "# Choose cow to leave out 7; 8; 21; 84.0\n", + "COW_NUMBER= 8" + ], + "metadata": { + "id": "3i-q64WnBEFG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Choose cow to leave out 7; 8; 21; 84.0\n", + "COW_NUMBER= 25" + ], + "metadata": { + "id": "TL3nm6AbBXKt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tHYXhRqQMC6n" + }, + "outputs": [], + "source": [ + "def choose_cow_to_leave_out(COW_NUMBER):\n", + " result_train = np.stack(signal_features_1[signal_features_1[:,6] != COW_NUMBER], axis =0)\n", + " result_test = np.stack(signal_features_1[signal_features_1[:,6] == COW_NUMBER], axis =0)\n", + " return result_test, result_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5qsidGLzPXRA" + }, + "outputs": [], + "source": [ + "result_test, result_train = choose_cow_to_leave_out(COW_NUMBER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d_DKAeBgIomV" + }, + "outputs": [], + "source": [ + "#leave one cow out\n", + "y_train = np.stack(result_train[:,0], axis =0)\n", + "y_test = np.stack(result_test[:,0], axis =0)\n", + "X_train = np.stack(result_train[:,8], axis =0)\n", + "X_test = np.stack(result_test[:,8], axis =0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dV5cO0Pj7wnd" + }, + "source": [ + "##Selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SxTSkv7ldN0G" + }, + "outputs": [], + "source": [ + "rf = RandomForestClassifier(n_estimators=30, max_depth=30, max_features=\"sqrt\")\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5oeCXkcK7dc1" + }, + "source": [ + "##Confusion matrix and classification report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d7ad2066-4852-4856-8831-8bcadb74bb35", + "id": "Vm1rn9LKBKQX" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluation RF validation with leave out cow_number 25\n", + "\n", + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[70204 8985]\n", + " [10862 4704]]\n", + "Accuracy Score : 0.7905440346155875\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.87 0.89 0.88 79189\n", + " 1 0.34 0.30 0.32 15566\n", + "\n", + " accuracy 0.79 94755\n", + " macro avg 0.60 0.59 0.60 94755\n", + "weighted avg 0.78 0.79 0.79 94755\n", + "\n" + ] + } + ], + "source": [ + "# Evaluation\n", + "print (\"Evaluation RF validation with leave out cow_number \"+ str(COW_NUMBER))\n", + "print(\"\")\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "26d8fdbc-d2cf-4e08-eb14-98c80844acf3", + "id": "bihkuNAPBJ0_" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluation RF validation with leave out cow_number 25\n", + "\n", + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[69576 9613]\n", + " [10574 4992]]\n", + "Accuracy Score : 0.7869558334652524\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.87 0.88 0.87 79189\n", + " 1 0.34 0.32 0.33 15566\n", + "\n", + " accuracy 0.79 94755\n", + " macro avg 0.60 0.60 0.60 94755\n", + "weighted avg 0.78 0.79 0.78 94755\n", + "\n" + ] + } + ], + "source": [ + "# Evaluation\n", + "print (\"Evaluation RF validation with leave out cow_number \"+ str(COW_NUMBER))\n", + "print(\"\")\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_R19kwDdiqf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4c51b8f9-c887-46be-c555-846bd47c6ee4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluation RF validation with leave out cow_number 21\n", + "\n", + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[78838 7236]\n", + " [ 7335 1156]]\n", + "Accuracy Score : 0.8459155078517422\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.91 0.92 0.92 86074\n", + " 1 0.14 0.14 0.14 8491\n", + "\n", + " accuracy 0.85 94565\n", + " macro avg 0.53 0.53 0.53 94565\n", + "weighted avg 0.85 0.85 0.85 94565\n", + "\n" + ] + } + ], + "source": [ + "# Evaluation\n", + "print (\"Evaluation RF validation with leave out cow_number \"+ str(COW_NUMBER))\n", + "print(\"\")\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "source": [ + "# Evaluation\n", + "print (\"Evaluation RF validation with leave out cow_number \"+ str(COW_NUMBER))\n", + "print(\"\")\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3l7VQNj7Aw_V", + "outputId": "d95c63e8-896a-492d-8adf-2f42054c65fe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluation RF validation with leave out cow_number 7\n", + "\n", + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[97183 2704]\n", + " [29441 5462]]\n", + "Accuracy Score : 0.7615179167594035\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.77 0.97 0.86 99887\n", + " 1 0.67 0.16 0.25 34903\n", + "\n", + " accuracy 0.76 134790\n", + " macro avg 0.72 0.56 0.56 134790\n", + "weighted avg 0.74 0.76 0.70 134790\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNy2Wygkvbyl" + }, + "source": [ + "# RandomForestAlgorithm Eating" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tKdiPiZSw2pY" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='eating'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iKa6UjZtPiYg" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,1], axis =0) # BEHAVIOUR_2= 'eating_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aXSfq-llvbyw" + }, + "outputs": [], + "source": [ + "# Split dataset into trainingsset (70%) and testset (30%)\n", + "X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.3, random_state=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k3jKi-NVvbyx" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MNeMy-6uvbyy", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "51c20332-435b-440e-a8d4-5336105386b0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_VpPWzWIvby0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "e9578a9d-130f-4e4b-c7a8-995285218516" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEHCAYAAAC0pdErAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZQV1bn38e9PZm0GGUQiKmi4RhyCikMSuKKZHOJINDjkSpIbY65GvXl9E42JenWZaEKybhwSr3mDiElAxRBNJCoiqFfUCIoKUYQgKmiwBRkVmZ73j6oDh9Onuw9N1zlNn99nrVpdtavq1NPV3fX03lW1tyICMzOzQjtVOgAzM2uZnCDMzKwoJwgzMyvKCcLMzIpygjAzs6LaVjqA5tKzZ8/o169fpcMwM9uhzJw5872I6FVsXatJEP369WPGjBmVDsPMbIci6Y361rmJyczMinKCMDOzopwgzMysKCcIMzMrygnCzMyKyixBSBot6V1Js+tZL0k3SZov6SVJh+atO0/SvHQ6L6sYzcysflnWIMYAxzWw/nhgQDqdD/waQFJ34GrgSOAI4GpJu2YYp5mZFZHZexAR8YSkfg1scgowNpL+xp+R1E1SH2AYMDkilgFImkySaMZlFWtLsHEjrFvXtGnDhi2fU9h7+7YsV2rfYstN1dI+x6wceveG4cOb/3Mr+aLcHsBbecuL0rL6yuuQdD5J7YO99tormyibYMMG+Nvf4JFH4PHHYeXKxi/ymzZVOmoz21EdeWTrSxDbLSJuB24HGDx4cEX/53vjDXj44SQpTJkCy5fDTjvBoYdCnz7Qvn3jU4cOpW1XOLVpA9KWWPLnt3W5UvsWW26qlvY5Zllrm9GVvJIJYjGwZ95y37RsMUkzU375tLJFVaI1a5LawcMPJ9PcuUl5375JJv/CF+Bzn4Pu3Ssbp5lZU1UyQTwAXCRpPMkN6RUR8Y6kh4Ef592Y/gJwRaWCzPfRR3DLLTBpEvzv/yZNQ506wdFHw7e+BV/8Iuy/v//zNLPWIbMEIWkcSU2gp6RFJE8mtQOIiNuAScAJwHzgA+Br6bplkq4Dnks/6trcDetK+9nP4Ec/goMOgosvTmoJQ4dCx46VjszMrPkpWsnjGoMHD44se3PdtAk+/nHYZx949NHMDmNmVlaSZkbE4GLr/CZ1iR5/HF5/Hb7+9UpHYmZWHk4QJRo9Grp2hdNOq3QkZmbl4QRRguXLYcIEOOec5Ka0mVk1cIIowfjxsHatm5fMrLo4QZRg9Gg4+ODkpTczs2rhBNGIl1+G555Lag9+v8HMqokTRCNGj066szj33EpHYmZWXk4QDfjoI7jrLjjlFOjRo9LRmJmVlxNEA/78Z1i61Denzaw6OUE0YPTopPO9z3++0pGYmZWfE0Q9Fi1KemkdOTLpTtvMrNo4QdTjzjuT/pdGjqx0JGZmleEEUcSmTUnz0rBhsO++lY7GzKwynCCKeOIJWLDAN6fNrLo5QRQxdWryUlwWY7yame0onCCKWLECampg550rHYmZWeU4QRSxahV06VLpKMzMKssJoohVq6Bz50pHYWZWWU4QRaxc6QRhZuYEUYSbmMzMoG2lA2iJVq2C3XardBRmO4iI5OWhiLpTsfJt2bY1fUaWx9t9dzj77Gb/0TpBFOEmJitq3brkl2PVquRr/nxDZWvX7pgXnVLKrWU48kgniHJxE1MrsmFD3Qt2sYt5KRf7jz4q7Zg1NckvUOfOydeOHWGnnZJJ2jIVLtdXtq3l5f4Mx1z5mDPqMM4JokCEn2KquE2bYPXqpl/M8+c//LC0Y+6885YLeu7ivvfedcvyvxYrq6lJ/mDNWgEniAIffQTr1ztBNFkErFkDy5dvmVasKD6fv7xixZaL+urVpR2rQ4e6F+w+fWC//Uq/sOcu6m39p2BWKNO/CknHAb8E2gD/LyJuKFi/NzAa6AUsA86NiEXpup8CJ5I8aTUZuCQi+0bPVauSr1XbxLRxY3KRru9C3tBFPje/cWPDx+jUCbp2hW7dkmnXXaFfv8b/Oy8sa9++LKfErFplliAktQFuBT4PLAKek/RARPw9b7NRwNiIuFPSscBPgK9K+jTwGeDgdLv/BY4GpmUVb04uQeywNYh167b+r3xbL/IrVzZ+jM6dkwt77iL/sY/B/vtvueDnrytc7to1+c/fzFq8LGsQRwDzI2IBgKTxwClAfoIYCHw3nZ8K/CmdD6Aj0B4Q0A5YkmGsm+WujztEDeKFF+Dmm+GZZ7Zc5D/4oOF9dtpp64t1t25Jn+YNXdTzl7t0cXOMWZXI8i99D+CtvOVFwJEF27wInE7SDHUa0FlSj4h4WtJU4B2SBHFLRLxSeABJ5wPnA+y1117NEnSLr0Fs2AB/+hPcdBM8+STssksyJmrPnqVd5GtqkicfzMwaUel/BS8DbpE0EngCWAxslPRxYH+gb7rdZElDI+LJ/J0j4nbgdoDBgwc3y/2JXA2ixSWIZcvgN7+BW2+Ft96C/v3hF79IBq3o2rXS0ZlZK5RlglgM7Jm33Dct2ywi3iapQSCpBhgeEcslfRN4JiJWp+v+CnwK2CpBZKHF3aSePTtpRrrrruSRzWOPhVtugRNP9GDZZpapLB/Yfg4YIKm/pPbACOCB/A0k9ZSUi+EKkieaAN4EjpbUVlI7khvUdZqYstAimpg2boQHHoDPfQ4OOgjGjoVzz4WXX4YpU+Dkk50czCxzmdUgImKDpIuAh0kecx0dEXMkXQvMiIgHgGHATyQFSRPThenuE4BjgZdJblg/FBF/zirWfBVtYlqxIhkM+5ZbkjFP99wTbrgB/v3foUePCgRkZtUs03sQETEJmFRQdlXe/ASSZFC430bgW1nGVp9cDaKmpowHnTs3aUYaMyZ5yWzIELjxRjj1VD8xZGYV46tPgVWrkgeDMm/B2bQJHnkkeRrpr39NXvo66yy4+GI49NCMD25m1jgniAKZ9+S6ejXceWdSY5g7N+mm99pr4fzzoXfvDA9sZrZtnCAKZNaT64IFySOqv/1tcq/hiCPg97+HL3/ZXUaYWYvkBFGg2XtyfeKJ5H2FBx5I2q3OOAMuuSTpv93MrAVzgijQrE1MEyYkCaFnT7jySrjgAthjj2b6cDOzbDlBFFi1KhkGYLu99lrylvNRR8FjjyU9mJqZ7UA8skmBZmli+uCDpObQvj3cfbeTg5ntkFyDKNAsTUwXXZS89TxpEjRTJ4JmZuXmGkSB7X6KafRouOMO+OEP4bjjmi0uM7Nyc4LIs25dMuRok2sQL74IF14In/0sXH11s8ZmZlZuThB5tqsn1xUrkncauneHP/zBnemZ2Q7P9yDyNLkn1wj4xjfg9ddh2jTYbbfmDs3MrOycIPI0uSfXm26C++6Dn/406WjPzKwVcBNTniY1MT39NFx2GZxySvLVzKyVcILIs81NTO+9B2eemYzbMGaMx3o2s1al6puYImD58uSdtm1qYtq0KRnlrbYWpk+Hbt0yjdPMrNyqvgbxzjvJg0d33bWNTUzXXw8PPwy//KXHbzCzVqnqaxC77pp8ff/9LT1iNFqDePTR5D2Hc89NxnEwM2uFqj5BdOqUTMuWbUkMDSaIxYvh7LNh//3httt838HMWq2qTxCQ1CLefz+Z79SpgWGg16+Hr3wl6YxvwoRkbFIzs1bKCYLkHsSyZUliaLD2cO+98NRTyQ2L/fcvW3xmZpXgBMGWGkTHjo0kiDlzkiwyYkTZYjMzq5Sqf4oJttQgGu3Jdf586N+/gTYoM7PWwwmCpAaRSxAN1iDmzYOPf7xscZmZVVKmCULScZLmSpov6fIi6/eWNEXSS5KmSeqbt24vSY9IekXS3yX1yyrO7t2TJqYGBwuKSGoQThBmViUySxCS2gC3AscDA4GzJA0s2GwUMDYiDgauBX6St24s8LOI2B84Ang3q1i7d4c1a2Dp0gaamGprkyqGE4SZVYksaxBHAPMjYkFErAPGA6cUbDMQeCydn5pbnyaSthExGSAiVkfEB1kFmntZbvHiBmoQ8+cnX50gzKxKZJkg9gDeyltelJblexE4PZ0/DegsqQfwL8BySX+U9IKkn6U1kkx075583bixgRpELkEMGJBVGGZmLUqlb1JfBhwt6QXgaGAxsJHk8duh6frDgX2AkYU7Szpf0gxJM2pra5scRK4GAQ3UIObNS0aJ23vvJh/HzGxHkmWCWAzsmbfcNy3bLCLejojTI+IQ4Mq0bDlJbWNW2jy1AfgTUKdHvIi4PSIGR8TgXr16NTnQXA0CGmli2nvvpNtXM7MqkGWCeA4YIKm/pPbACOCB/A0k9ZSUi+EKYHTevt0k5a76xwJ/zyrQ/ATRYBOT7z+YWRXJLEGk//lfBDwMvALcExFzJF0r6eR0s2HAXEmvAb2B69N9N5I0L02R9DIg4DdZxdpoE1OE34Ews6qT6SvBETEJmFRQdlXe/ARgQj37TgYOzjK+nK5dk05ZI+pJEMuWwYoVvkFtZlWl0jepW4Q2bZIkAfU0Mc2bl3x1DcLMqogTRCp3H6JoDcLvQJhZFSopQUgaIulr6XwvSf2zDav8cvch6k0QUtJRn5lZlWg0QUi6Gvg+yVNGAO2A32UZVCXkahBFm5jmz4e99oIOHcoak5lZJZVSgzgNOBlYA8m7C0BjozbvcBptYvINajOrMqUkiHUREUAASGqV42zuumvyDlzR9+D8iKuZVaFSHnO9R9L/kLy49k3g62T4TkKlfPWrSStSHcuWJZMThJlVmQYThCQBdwOfAFYC+wFX5XpZbU0+/elkquMf/0i+OkGYWZVpMEFEREiaFBEHAa0uKZTEvbiaWZUq5R7E85IOzzySlir3iOs++1Q6EjOzsirlHsSRwDmS3iB5kkkklYuydINRcfPmQd++0LFjpSMxMyurUhLEFzOPoiVzL65mVqUabWKKiDeAbsBJ6dQtLasOThBmVqVKeZP6EuD3wG7p9DtJ38k6sBZhxQqorfUNajOrSqU0MX0DODIi1gBIuhF4Grg5y8BaBD/iamZVrJSnmEQyTnTOxrSs9XM332ZWxUqpQdwBPCtpYrp8KvDb7EJqQXLvQPgRVzOrQo0miIj4haRpwJC06GsR8UKmUbUU8+fDxz4Gu7TK7qfMzBrUaIKQdBQwJyKeT5e7SDoyIp7NPLpKcy+uZlbFSrkH8Wtgdd7y6rSs9fMjrmZWxUq6SZ129w1ARGyitHsXO7ZVq+Cf/3SCMLOqVUqCWCDpYknt0ukSYEHWgVWcH3E1sypXSoK4APg0sBhYRNI30/lZBtUiuBdXM6typTzF9C4wogyxtCy5BLHvvpWNw8ysQkrpauOn6ZNL7SRNkVQr6dxyBFdR8+fD7rtDTU2lIzEzq4hSmpi+EBErgS8BC4GPA/+3lA+XdJykuZLmS7q8yPq906TzkqRpkvoWrO8iaZGkW0o5XrPyONRmVuVKSRC5ZqgTgXsjYkUpHyypDXArcDwwEDhL0sCCzUYBY9OxJa4FflKw/jrgiVKO1+z8iKuZVblSEsRfJL0KHAZMkdQLWFvCfkcA8yNiQUSsA8YDpxRsMxB4LJ2fmr9e0mFAb+CREo7VvNasgbff9g1qM6tqpYwHcTnJU0yDI2I98AF1L/TF7AG8lbe8KC3L9yJwejp/GtBZUg9JOwE/By5r6ACSzpc0Q9KM2traEkIq0YL0KV7XIMysipVSgyAilkXExnR+TUT8s5mOfxlwtKQXgKNJHqXdCPwHMCkiFjUS1+0RMTgiBvfq1auZQmLLE0xOEGZWxbJ8I3oxsGfect+0bLOIeJu0BiGpBhgeEcslfQoYKuk/gBqgvaTVaW0me37E1cws0wTxHDBAUn+SxDACODt/A0k9gWVp9x1XAKMBIuKcvG1GkjRvlSc5ACxfDm3bQteuZTukmVlLU28Tk6QvSvpykfIvS/p8Yx8cERuAi4CHgVeAeyJijqRrJZ2cbjYMmCvpNZIb0tc34XtofmvXQseOlY7CzKyiGqpBXEUyOFChacCfgcmNfXhETAImFZRdlTc/AZjQyGeMAcY0dqxm5QRhZtbgTeoOEVHn0aCIeA9o3SPofPghdOpU6SjMzCqqoQTRRVKdGoakdkDrvnq6BmFm1mCC+CPwG0mbawvpk0a3petaLycIM7MGE8QPgSXAG5JmSnoeeB2oTde1Xm5iMjOr/yZ1+hTS5ZL+i6SDPki6zviwLJFVkmsQZmb1JwhJpxcUBdBN0qyIWJVtWBW2di106VLpKMzMKqqhx1xPKlLWHThY0jci4rEi61uHDz+E3XardBRmZhXVUBPT14qVS9obuIdk6NHWae1a34Mws6pXUmd9+SLiDaBdBrG0HL4HYWa27QlC0n7ARxnE0nI4QZiZNXiT+s8kN6bzdQf6AF/NMqiK82OuZmYN3qQeVbAcwFJgXjpCXOvlGoSZWYM3qR8vVi5piKSzIuLC7MKqoE2bYN06Jwgzq3oljQch6RCSsRzOIHmbuvV2tbE2HW7bTUxmVuUaugfxL8BZ6fQecDegiDimTLFVRi5BuAZhZlWuoRrEq8CTwJciYj6ApP8sS1SV5ARhZgY0/Jjr6cA7wFRJv5H0WUDlCauCPky7mnITk5lVuXoTRET8KSJGAJ8ApgKXArtJ+rWkL5QrwLJzDcLMDCjhRbmIWBMRf4iIk4C+wAvA9zOPrFKcIMzMgG18kzoi3o+I2yPis1kFVHFuYjIzA5rQ1Uar5xqEmRngBFGXE4SZGeAEUZebmMzMACeIulyDMDMDnCDqcoIwMwMyThCSjpM0V9J8SZcXWb+3pCmSXpI0TVLftHyQpKclzUnXfSXLOLeSa2JygjCzKpdZgpDUBrgVOB4YCJwlaWDBZqOAsRFxMHAt8JO0/APg3yLiAOA44L8ldcsq1q24sz4zMyDbGsQRwPyIWJCOHzEeOKVgm4HAY+n81Nz6iHgtIual828D7wK9Mox1i1yC6NChLIczM2upskwQewBv5S0vSsvyvUjS5xPAaUBnST3yN5B0BNAe+EfhASSdL2mGpBm1tbXNE/XatdCuHbRp0zyfZ2a2g6r0TerLgKMlvQAcDSwGNuZWSuoD3AV8LSI2Fe6cvtU9OCIG9+rVTBUMDzdqZgaUOGBQEy0G9sxb7puWbZY2H50OIKkGGB4Ry9PlLsCDwJUR8UyGcW7Nw42amQHZ1iCeAwZI6i+pPTACeCB/A0k9JeViuAIYnZa3ByaS3MCekGGMdTlBmJkBGSaIiNgAXAQ8DLwC3BMRcyRdK+nkdLNhwFxJrwG9gevT8jOBfwVGSpqVToOyinUrbmIyMwOybWIiIiYBkwrKrsqbnwDUqSFExO+A32UZW71cgzAzAyp/k7rlcYIwMwOcIOpyE5OZGeAEUZdrEGZmgBNEXU4QZmaAE0RdbmIyMwOcIOpyDcLMDHCCqMsJwswMcIKo68MPnSDMzHCCqGvtWt+DMDPDCWJrGzYkk2sQZmZOEFvxeNRmZps5QeTzcKNmZps5QeRzDcLMbDMniPfeg09+Eu6+2wnCzCyPE0SbNvDSS/DOO8kjruAmJjMznCC2JIMPP3QNwswsjxNEhw4gOUGYmRVwgpCShPDhh25iMjPL4wQBSUL44APXIMzM8jhBQJIg3MRkZrYVJwjYkiDcxGRmtpkTBMDOO7sGYWZWwAkC3MRkZlaEEwS4icnMrIhME4Sk4yTNlTRf0uVF1u8taYqklyRNk9Q3b915kual03lZxlmnBtGhQ6aHMzPbEWSWICS1AW4FjgcGAmdJGliw2ShgbEQcDFwL/CTdtztwNXAkcARwtaRds4p1qwSRe3HOzKzKZVmDOAKYHxELImIdMB44pWCbgcBj6fzUvPVfBCZHxLKIeB+YDByXWaT5TUy+/2BmBmSbIPYA3spbXpSW5XsROD2dPw3oLKlHifs2n/wahO8/mJkBlb9JfRlwtKQXgKOBxcDGUneWdL6kGZJm1NbWNj2K/AThGoSZGQBtM/zsxcCeect907LNIuJt0hqEpBpgeEQsl7QYGFaw77TCA0TE7cDtAIMHD44mR5rrasNNTGYtyvr161m0aBFrcw+QWJN17NiRvn370q5du5L3yTJBPAcMkNSfJDGMAM7O30BST2BZRGwCrgBGp6seBn6cd2P6C+n6bHTqBOvWwZo1bmIya0EWLVpE586d6devH/LDI00WESxdupRFixbRv3//kvfLrIkpIjYAF5Fc7F8B7omIOZKulXRyutkwYK6k14DewPXpvsuA60iSzHPAtWlZNnbeOfm6fLlrEGYtyNq1a+nRo4eTw3aSRI8ePba5JpZlDYKImARMKii7Km9+AjChnn1Hs6VGka1creH992H33ctySDMrjZND82jKeaz0TeqWIT9BuInJzAxwgkjkksKyZW5iMrPNli5dyqBBgxg0aBC77747e+yxx+bldevWNbjvjBkzuPjii7f5mLNmzUISDz300OayhQsXcuCBB2613TXXXMOoUaM2L48aNYpPfOITDBo0iMMPP5yxY8du87ELZdrEtMPIJYh165wgzGyzHj16MGvWLCC5INfU1HDZZZdtXr9hwwbati1+GR08eDCDBw/e5mOOGzeOIUOGMG7cOI47rrT3g2+77TYmT57M3/72N7p06cLKlSuZOHHiNh+7kBMEbN2s5CYmsxbv0kshvW5vs0GD4L//u+nHHjlyJB07duSFF17gM5/5DCNGjOCSSy5h7dq1dOrUiTvuuIP99tuPadOmMWrUKP7yl79wzTXX8Oabb7JgwQLefPNNLr300qK1i4jg3nvvZfLkyQwdOpS1a9fSsYR/Wn/84x8zbdo0unTpAkCXLl0477zt78LOCQK2TgquQZhZIxYtWsT06dNp06YNK1eu5Mknn6Rt27Y8+uij/OAHP+C+++6rs8+rr77K1KlTWbVqFfvttx/f/va367yTMH36dPr378++++7LsGHDePDBBxk+fHiDsaxcuZJVq1axzz77NOv3CE4QCScIsx3K9tQAmsMZZ5xBmzZtAFixYgXnnXce8+bNQxLr168vus+JJ55Ihw4d6NChA7vtthtLliyhb9++W20zbtw4RowYAcCIESMYO3Ysw4cPr/cJpKyf8HKCADcxmdk22WWXXTbP/+hHP+KYY45h4sSJLFy4kGHDhhXdp0PeMAJt2rRhw4YNW63fuHEj9913H/fffz/XX3/95pfbVq1aRY8ePXj//fe32n7ZsmX079+fLl26UFNTw4IFC5q9FuGnmGDLi3LgGoSZbZMVK1awxx5JX6Jjxoxp8udMmTKFgw8+mLfeeouFCxfyxhtvMHz4cCZOnEhNTQ19+vThsceSzq+XLVvGQw89xJAhQwC44ooruPDCC1m5ciUAq1evbpanmJwgwE1MZtZk3/ve97jiiis45JBD6tQKtsW4ceM47bTTtiobPnw448aNA2Ds2LFcd911DBo0iGOPPZarr76afffdF4Bvf/vbHHPMMRx++OEceOCBDB06lJ122v7LuyKa3sddSzJ48OCYMWNG03Zetgx69Ejmb7kFLryw+QIzsyZ75ZVX2H///SsdRqtR7HxKmhkRRZ/HdQ0CfA/CzKwIJwjYulnJTUxmZoATRELakhicIMzMACeILXJNS25iMjMDnCC2yCUG1yDMzAAniC2cIMzMtuIEkeMmJjMrsD3dfQNMmzaN6dOnN7jNqaeeylFHHbVV2ciRI5kwYeux1GpqajbPv/baa5xwwgkMGDCAQw89lDPPPJMlS5Zsw3dWGne1kZN7m9o1CDNLNdbdd2OmTZtGTU0Nn/70p4uuX758OTNnztymrjLWrl3LiSeeyC9+8QtOOumkzcepra2ld+/eJcdWCieIHDcxmbVs29PHd32a0Pf3zJkz+e53v8vq1avp2bMnY8aMoU+fPtx0003cdttttG3bloEDB3LDDTdw22230aZNG373u99x8803M3To0K0+649//CMnnXQSvXv3Zvz48fzgBz9o9Ph/+MMf+NSnPrU5OQD19v+0vZwgctzEZGaNiAi+853vcP/999OrVy/uvvturrzySkaPHs0NN9zA66+/TocOHVi+fDndunXjggsuaLDWMW7cOK666ip69+7N8OHDS0oQs2fP5rDDDmvub60oJ4gc1yDMWrZK9/ENfPTRR8yePZvPf/7zQNIDa58+fQA4+OCDOeecczj11FM59dRTG/2sJUuWMG/ePIYMGYIk2rVrx+zZsznwwAOLduOdddfexfgmdY4ThJk1IiI44IADmDVrFrNmzeLll1/mkUceAeDBBx/kwgsv5Pnnn+fwww9vtOO+e+65h/fff5/+/fvTr18/Fi5cuLljvsLuvZctW0bPnj0BOOCAA5g5c2ZG3+HWnCByOnVK3qhu377SkZhZC9WhQwdqa2t5+umnAVi/fj1z5sxh06ZNvPXWWxxzzDHceOONrFixgtWrV9O5c2dWrVpV9LPGjRvHQw89xMKFC1m4cCEzZ85k/PjxQHJP4e677978pNSYMWM45phjADj77LOZPn06Dz744ObPeuKJJ5g9e3azf79OEDmdOiW1hwpU48xsx7DTTjsxYcIEvv/97/PJT36SQYMGMX36dDZu3Mi5557LQQcdxCGHHMLFF19Mt27dOOmkk5g4cSKDBg3iySef3Pw5ufEe8h9v7d+/P127duXZZ5/lS1/6EkOHDuWwww5j0KBBPPXUU9x4440AdOrUib/85S/cfPPNDBgwgIEDB/KrX/2KXr16Nfv36+6+cx5/HB59FK67rvmCMrPt4u6+m1eL6u5b0nGS5kqaL+nyIuv3kjRV0guSXpJ0QlreTtKdkl6W9IqkK7KME4Cjj3ZyMDPLk1mCkNQGuBU4HhgInCVpYMFmPwTuiYhDgBHAr9LyM4AOEXEQcBjwLUn9sorVzMzqyrIGcQQwPyIWRMQ6YDxwSsE2AXRJ57sCb+eV7yKpLdAJWAeszDBWM2uhWkszeKU15TxmmSD2AN7KW16UluW7BjhX0iJgEvCdtHwCsAZ4B3gTGBURywoPIOl8STMkzaitrW3m8M2s0jp27MjSpUudJLZTRLB06VI6buNj/JV+Ue4sYExE/FzSp4C7JB1IUvvYCHwM2BV4UtKjEbEgf+eIuDQkph0AAAgdSURBVB24HZKb1OUN3cyy1rdvXxYtWoT/Adx+HTt2pG/fvtu0T5YJYjGwZ95y37Qs3zeA4wAi4mlJHYGewNnAQxGxHnhX0lPAYGABZlY12rVrR//+/SsdRtXKsonpOWCApP6S2pPchH6gYJs3gc8CSNof6AjUpuXHpuW7AEcBr2YYq5mZFcgsQUTEBuAi4GHgFZKnleZIulbSyelm/wf4pqQXgXHAyEgaG28FaiTNIUk0d0TES1nFamZmdflFOTOzKtbQi3KtJkFIqgXeaMKuPYH3mjmc5tBS44KWG5vj2jYtNS5oubG1xrj2joii/XS0mgTRVJJm1Jc9K6mlxgUtNzbHtW1aalzQcmOrtrjcWZ+ZmRXlBGFmZkU5QaQv2rVALTUuaLmxOa5t01LjgpYbW1XFVfX3IMzMrDjXIMzMrCgnCDMzK6qqE0RjAxqVMY4904GT/i5pjqRL0vJrJC2WNCudTqhAbAvTgZtmSZqRlnWXNFnSvPTrrmWOab+8czJL0kpJl1bqfEkaLeldSbPzyoqeIyVuSn/nXpJ0aJnj+pmkV9NjT5TULS3vJ+nDvHN3W5njqvdnJ+mK9HzNlfTFMsd1d15MCyXNSsvLeb7quz5k/zsWEVU5AW2AfwD7AO2BF4GBFYqlD3BoOt8ZeI1kkKVrgMsqfJ4WAj0Lyn4KXJ7OXw7cWOGf4z+BvSt1voB/BQ4FZjd2joATgL8CIulj7Nkyx/UFoG06f2NeXP3yt6vA+Sr6s0v/Dl4EOgD907/ZNuWKq2D9z4GrKnC+6rs+ZP47Vs01iFIGNCqLiHgnIp5P51eR9F1VOHZGS3IKcGc6fydwagVj+Szwj4hoylv0zSIingAKxyup7xydAoyNxDNAN0l9yhVXRDwSST9pAM+Q9LJcVvWcr/qcAoyPiI8i4nVgPsnfblnjkiTgTJI+48qqgetD5r9j1ZwgShnQqOyUDK16CPBsWnRRWk0cXe6mnFQAj0iaKen8tKx3RLyTzv8T6F2BuHJGsPUfbaXPV05956gl/d59neQ/zZz+SsaHf1zS0ArEU+xn11LO11BgSUTMyysr+/kquD5k/jtWzQmixZFUA9wHXBoRK4FfA/sCg0hG1/t5BcIaEhGHkowtfqGkf81fGUmdtiLPSivpRv5k4N60qCWcrzoqeY7qI+lKYAPw+7ToHWCvSMaH/y7wB0ld6ts/Ay3yZ5fnLLb+R6Ts56vI9WGzrH7HqjlBlDKgUdlIakfyw/99RPwRICKWRMTGiNgE/IaMqtYNiYjF6dd3gYlpDEtyVdb067vljit1PPB8RCxJY6z4+cpT3zmq+O+dpJHAl4Bz0gsLaRPO0nR+Jklb/7+UK6YGfnYt4Xy1BU4H7s6Vlft8Fbs+UIbfsWpOEKUMaFQWafvmb4FXIuIXeeX57YanAbML9804rl0kdc7Nk9zgnE1yns5LNzsPuL+cceXZ6r+6Sp+vAvWdoweAf0ufNDkKWJHXTJA5SccB3wNOjogP8sp7SWqTzu8DDKCMIzg28LN7ABghqYOk/mlcfytXXKnPAa9GxKJcQTnPV33XB8rxO1aOu/AtdSK52/8aSfa/soJxDCGpHr4EzEqnE4C7gJfT8geAPmWOax+SJ0heBObkzhHQA5gCzAMeBbpX4JztAiwFuuaVVeR8kSSpd4D1JO2936jvHJE8WXJr+jv3MjC4zHHNJ2mfzv2e3ZZuOzz9Gc8CngdOKnNc9f7sgCvT8zUXOL6ccaXlY4ALCrYt5/mq7/qQ+e+Yu9owM7OiqrmJyczMGuAEYWZmRTlBmJlZUU4QZmZWlBOEmZkV5QRhZmZFOUGYbQdJgwq6pj5ZzdR1vJIuzHdujs8yawq/B2G2HdJuKwZHxEUZfPbC9LPf24Z92kTExuaOxaqTaxBWFdIBXl6R9Jt00JVHJHWqZ9t9JT2U9mD7pKRPpOVnSJot6UVJT6RdtFwLfCUdNOYrkkZKuiXdfoykX0t6RtICScPSnkpfkTQm73i/ljQjjeu/0rKLgY8BUyVNTcvOUjJ402xJN+btv1rSzyW9CHxK0g1KBpd5SdKobM6oVYWsXg/35KklTSQDvGwABqXL9wDn1rPtFGBAOn8k8Fg6/zKwRzrfLf06Erglb9/NyyRdNIwn6frgFGAlcBDJP2Yz82LJdZHQBpgGHJwuLyQdrIkkWbwJ9ALaAo8Bp6brAjgzne9B0iWF8uP05Kkpk2sQVk1ej4hZ6fxMkqSxlbRL5U8D9yoZXvJ/SEb0AngKGCPpmyQX81L8OSKCJLksiYiXI+mxdE7e8c+U9DzwAnAAyWhhhQ4HpkVEbSQD/vyeZAQ0gI0kPX0CrADWAr+VdDrwQZ1PMitR20oHYFZGH+XNbwSKNTHtBCyPiEGFKyLiAklHAicCMyUdtg3H3FRw/E1A27SH0suAwyPi/bTpqWMJn5tvbaT3HSJig6QjSEba+zJwEXDsNn6eGeB7EGZbiWQgltclnQGbB4D/ZDq/b0Q8GxFXAbUkfe6vIhknuKm6AGuAFZJ6k4xxkZP/2X8DjpbUM+1m+izg8cIPS2tAXSNiEvCfwCe3Izarcq5BmNV1DvBrST8E2pHcR3gR+JmkAST3FKakZW8Cl6fNUT/Z1gNFxIuSXgBeJemG+6m81bcDD0l6OyKOSR+fnZoe/8GIKDYOR2fgfkkd0+2+u60xmeX4MVczMyvKTUxmZlaUm5isakm6FfhMQfEvI+KOSsRj1tK4icnMzIpyE5OZmRXlBGFmZkU5QZiZWVFOEGZmVtT/B0/uJHTF2o2vAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g3I6meg_vby1" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zVLnZTC-vby2", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fb2d928f-0d07-41a4-b4be-1ac862ed1ea7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sN-ePEulvby2", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "1c35c213-55a2-4447-8c19-974abbe48a7d" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1M4I9DMPvby3" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZoqVh_F_vby3" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=40, max_depth=25, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KCF7N0Ivby4" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VwXVge1nvby4", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1ba5c388-91c7-4297-81bc-20051bf8f2d1" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: eating\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[94810 197]\n", + " [ 1872 32399]]\n", + "Accuracy Score : 0.9839957301319637\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 1.00 0.99 95007\n", + " 1 0.99 0.95 0.97 34271\n", + "\n", + " accuracy 0.98 129278\n", + " macro avg 0.99 0.97 0.98 129278\n", + "weighted avg 0.98 0.98 0.98 129278\n", + "\n" + ] + } + ], + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nzu-L5Ta5YNy" + }, + "source": [ + "##Feature importances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QP2iuIoJvby5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "444088dc-7d8e-4cbf-e616-d10553d90198" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.02046\n", + "Feature: 1, Score: 0.03013\n", + "Feature: 2, Score: 0.03189\n", + "Feature: 3, Score: 0.03386\n", + "Feature: 4, Score: 0.04063\n", + "Feature: 5, Score: 0.08446\n", + "Feature: 6, Score: 0.07268\n", + "Feature: 7, Score: 0.06740\n", + "Feature: 8, Score: 0.06580\n", + "Feature: 9, Score: 0.06534\n", + "Feature: 10, Score: 0.02449\n", + "Feature: 11, Score: 0.02755\n", + "Feature: 12, Score: 0.02978\n", + "Feature: 13, Score: 0.02947\n", + "Feature: 14, Score: 0.03364\n", + "Feature: 15, Score: 0.08171\n", + "Feature: 16, Score: 0.06800\n", + "Feature: 17, Score: 0.06569\n", + "Feature: 18, Score: 0.06516\n", + "Feature: 19, Score: 0.06185\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qpozfboOB5Rj" + }, + "source": [ + "# RandomForestAlgorithm Resting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t7NJuxFoB5Rj" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='resting'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "01BPbs5zB5Rj" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,6], axis =0) # BEHAVIOUR_7= 'resting_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-x3N1cLKB5Rj" + }, + "outputs": [], + "source": [ + "# Split dataset into trainingsset (70%) and testset (30%)\n", + "X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.3, random_state=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o6qGxacCB5Rj" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "89aaf4c2-9fc5-42a4-ab18-affc8d18e90b", + "id": "v4wVTyCQB5Rj" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "d915b7c5-c04a-44a6-e195-1be39c208940", + "id": "IphlpOBCB5Rk" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Ey57_AoB5Rk" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e2873673-4d80-4737-d2a3-81ab5a2fc395", + "id": "frtdWgtPB5Rk" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "208ff3d0-f168-495d-9e41-55581c7aa067", + "id": "meSI9NYlB5Rk" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZxN9f/A8dfbGDPW7JJJJLukGkuhaBd+CDVKaBNZiyhtFKXyJbT4auWbxr4UStZUyi5ZIw1GWbKMdcz2+f3xucM1c2fMMHfOXd7Px+M87r3nnLnzPnM57/vZxRiDUkqp4JXH6QCUUko5SxOBUkoFOU0ESikV5DQRKKVUkNNEoJRSQS6v0wFkV8mSJU2FChWcDkMppfzK2rVr/zXGlPJ0zO8SQYUKFVizZo3TYSillF8Rkd0ZHdOqIaWUCnKaCJRSKshpIlBKqSDnd20EniQmJhIbG0t8fLzTofi98PBwIiIiCA0NdToUpVQuCYhEEBsbS+HChalQoQIi4nQ4fssYw+HDh4mNjaVixYpOh6OUyiVeqxoSkc9E5KCIbMrguIjIGBHZKSIbReSmS/1d8fHxlChRQpPAZRIRSpQooSUrpYKMN9sIvgDuy+R4M6Cya+sKfHQ5v0yTQM7Qv6NSwcdrVUPGmOUiUiGTU1oBE42dB/tXESkqImWNMf94KyallHNSUuDUKThzBpKS7JacfOFj2udZeZ2SAsakf7zYPk/Psyr1ZzLbLpen92jZEurWvfz3TsvJNoJywF6317GufekSgYh0xZYaKF++fK4Elx2HDx/mzjvvBGD//v2EhIRQqpQdwLdq1Sry5cuX4c+uWbOGiRMnMmbMmGz9zg0bNnDjjTfy7bffct99tuAVExNDixYt2LTpfG3c4MGDKVSoEP379wdgxIgRfPLJJ4SHhxMaGkqvXr3o1KlTtn63Cj7GwIkTcPQoHDly4Za67+hRe87Jk/YxdUt9feqU01fhf9IW0K+6KvASQZYZY8YD4wEiIyN9biWdEiVKsGHDBiD9jRcgKSmJvHk9/6kjIyOJjIzM9u+Mjo6mUaNGREdHn0sEFzNu3DgWLlzIqlWrKFKkCMePH2fWrFnZ/t0qMMTFwf79cPAgHDpkH1M399eHDtkbfXJyxu+VPz8ULQqFC9utUCEoV+7C16nP8+eHvHntFhJy4WPqc/fX7lva/SEhkCeP3UTOP7o/97Qvs+dZlXp+Rps/cTIR7AOudnsd4doXELp06UJ4eDjr16+nYcOGREVF0adPH+Lj48mfPz+ff/45VatWZdmyZYwYMYK5c+cyePBg9uzZw65du9izZw99+/ald+/e6d7bGMO0adNYuHAhjRs3Jj4+nvDw8IvG9Oabb7Js2TKKFCkCQJEiRejcuXOOX7vyHSkpsHcvbNtmt61bzz8/cMDzzxQrBqVKQenSUK0aNG4MJUpA8eL2WPHiF27Fitmbu/JfTiaCr4GeIjIZqA/E5XT7QN++4Pqinm116sB7713e74+NjWXFihWEhIRw/PhxfvzxR/LmzcuiRYsYNGgQM2bMSPcz27ZtY+nSpZw4cYKqVavSvXv3dH36V6xYQcWKFalUqRJNmjRh3rx5tG3bNtNYjh8/zokTJ7j22msv76KUTzt9Gj75BH75xd7st2+3dfKpihWD6tWheXOoUgUiIuwNP/XGX7IkZFKTqQKU1xKBiEQDTYCSIhILvAaEAhhjxgHzgfuBncBp4DFvxeKU9u3bExISAkBcXBydO3dmx44diAiJiYkef6Z58+aEhYURFhZG6dKlOXDgABERERecEx0dTVRUFABRUVFMnDiRtm3bZtjjR3sCBb6EBJsA3njDVvdUqGBv+E2b2m/11avbx5Il/a/aQnmfN3sNdbjIcQP08Nbvh8v/Rn+5ChYseO75K6+8QtOmTZk1axYxMTE0adLE48+EhYWdex4SEkJSUtIFx5OTk5kxYwZz5sxh2LBh5waBnThxghIlSnD06NELzj9y5AgVK1akSJEiFCpUiF27dmmpIIAkJ8OkSfDaaxATA40awdSptjpHqazyi8biQBAXF0e5cuUA+OKLLy75fRYvXkzt2rVZsGDBuX2dO3dm1qxZdOrUibJly7JkyRLuuOMOjhw5wnfffUefPn0AePHFF+nRowdTpkyhSJEinDx5kpkzZ2qvIT9kDMyaBa+8Alu2wI03wkcfwb33BtE3/sRE2xXJmPOtxqktzanP3f8YKSlw9qytK4uPT/+Y+jwrW3y8fa/Ux9Qt7Wv3/q0Zban9Vt37i6Y+T/v4/vvQtWuO/yk1EeSSAQMG0LlzZ4YOHUrz5s0v+X2io6Np06bNBfvatm3LRx99RKdOnZg4cSI9evTgueeeA+C1116jUqVKAHTv3p2TJ09St25dQkNDCQ0NpV+/fpd+USrXGQOLFsGgQbBmDVStCtOmwQMP2HufzzIGjh2DPXtsN6TTp+1N/PTp9M9TX3vaTp48/zyD6tULpHYJypMna+dn9j7581+4hYWd38LDbZco931hYbZrk3uXpLRb2u5K7okr7T4RuOGGS7+GzC7P5MTIh1wUGRlp0i5Ms3XrVqpXr+5QRIFH/56+5/hx+OEHGDkSli2D8uVhyBDo2NHeaxyXkGC7J+3da2/2abe9e+1N/GIKFDi/FSxot0KFzj9P+7pAAXszTUmx9WTJyZ6fp6TYG3P+/PamHR5+/rmnx7Rbvnx+X9QSkbXGGI991X3hn5BSKo0TJ+Cnn2DpUnvjX7vW3svKlIGxY+Gpp+x9LdcYYwcT7NqVfvvzT3ujTzs0t3Rpm7GqV7d1VuXLw9VX24tIvYmn3vALFLA3YT+/2forTQRK+YATJ+Dnn+1Nf+lSe+NPTobQUKhfH156CZo0gVtvtfdLrzHGfoPftOn8tnmzveHHxV14bpkycO21toX62muhYkW45hp7w4+I8HKgKidpIlDKQadPw3PP2a6fycm2mqd+fXjhBdv185Zb7Jdlrzh4EH7/Pf1N/8SJ8+dcfTXUqAENG9qbfepWsaKtolEBQROBUg757Tfo0MEO/OrWDdq0sd/43Xod55zjx20xY/VqWLXKPu7Zc/54iRJw/fXQubN9rFXLJoCiRb0QjPI1mgiUymXG2F6Azz9vR/p+/z3cdVcO/oKEBNi40d7wU2/6W7ee74J47bU24/TpY3uh1Kpl6/O1fj5oaSJQKhcdOgSPPw5z59ppHj7/3E7vkGNmz4bu3e3wYrBvXq8ePPSQfYyMtMOLlXKjiSAHXM401ADLli0jX7583HrrrRme07p1a/bv38+vv/56bl+XLl1o0aIF7dq1O7evUKFCnHR10/vjjz/o27cvO3bsoHDhwlx33XWMHTuWMmXKXPK1qku3eDE8+igcPgyjR0OvXjn4Jfzff+0bTp58fqKsBg1sw61+01cXoYkgB1xsGuqLWbZsGYUKFcowERw7doy1a9dma4qI+Ph4mjdvzsiRI2nZsuW533Po0CFNBLksMRFefRXeftsOAJs/396rc8z06fDMM3bA1uuv25bmNBMVKpUZXx6L6NfWrl3L7bffzs0338y9997LP//YiVXHjBlDjRo1qF27NlFRUcTExDBu3DhGjRpFnTp1+PHHH9O918yZM2nZsiVRUVFMnjw5S7//q6++4pZbbjmXBACaNGlCrVq1cuYCVZb8+aftXTl8ODz5pB0NnGNJ4OBBaN/ebuXL28bgV17RJKCyLfBKBJcz93RGsjkntTGGXr16MWfOHEqVKsWUKVN46aWX+Oyzzxg+fDh//fUXYWFhHDt2jKJFi9KtW7dMSxHR0dG8+uqrlClThrZt2zJo0KCLxrBp0yZuvvnmLMesct78+RAVZae8mTYN3GrwLo8xMGUK9Oxpu3q++aZtefaJIcbKH+m/HC84e/YsmzZt4u677wbsjKFly5YFoHbt2jzyyCO0bt2a1q1bX/S9Dhw4wI4dO2jUqBEiQmhoKJs2baJWrVoep5fWKad9Q3Q0dOpke2LOmmXHWeWI/fttY/Ds2bbx9/PPbTdPpS5D4CUCp+eexpYIatasyS+//JLu2Lx581i+fDnffPMNw4YN4/fff8/0vaZOncrRo0epWLEiYBeYiY6OZtiwYemmnT5y5AglXT1CatasyQ8//JCDV6Wyatw4W2XfuDF88w24FoS7PLt3w4wZMHSoHYX2zjvw7LNaClA5QtsIvCAsLIxDhw6dSwSJiYls3ryZlJQU9u7dS9OmTXn77beJi4vj5MmTFC5cmBPuozndREdH89133xETE0NMTAxr1649107QpEkTpkyZQkJCAmCnt27atCkADz/8MCtWrGDevHnn3mv58uUXLGyvct7w4fYL+/33w3ffXUYSMAbWr7cLDdSpY1ea6dcPata0VZ9aFaRykP5L8oI8efIwffp0evfuTVxcHElJSfTt25cqVarQsWNH4uLiMMbQu3dvihYtSsuWLWnXrh1z5sxh7NixNHatKhITE8Pu3btp0KDBufeuWLEiV1xxBStXrqRFixasXbuWm2++mZCQECpVqsS4ceMAyJ8/P3PnzqVv37707duX0NBQateuzejRox35mwQ6Y2xnnXfegYcfhi++uIQ228REO8XonDnw9dd25K+Ind7h3XehVSuoXNkb4asgp9NQq3T075k9ycm2FPDxx/bx/fezuTbA2rXwn//Y1uW4ODvt8T332Bt/ixY5POJMBSudhlopL0lIsIPEpk61i8UMHZqN8VvG2DatgQPtoiYPPACtW9v5Jrw205xS6WkiUOoSnT4NbdvatoB334VsjCG0w4u7dLFzTbRuDZ9+CsWLeytUpTIVMInAGKNdJ3OAv1UVOuXYMVtrs2KFrRJ68sls/PDPP9tpRw8cgDFj7HgA/berHBQQvYbCw8M5fPiw3sQukzGGw4cPE64LimQqLs6uFbBqlR3XleUkkJJiuxXdfrttSV6xIocnHFLq0gREiSAiIoLY2FgOHTrkdCh+Lzw8nIiICKfD8FnG2GUif//djhFo1iyLP3jwoG1M+P57ePBBGD8errjCq7EqlVUBkQhCQ0PPDbhSypvGj7fTRbz9djaSwLJltk/pkSPw3//aTKKlAOVDAqJqSKncsHGjXcvl3nuz2DCcnAxDhsCdd9qRZatWQdeumgSUzwmIEoFS3nbypF3bpXhxmDgxC+MEzp6FRx6x00I8+ih8+KGu8at8liYCpbKgZ0/Yvt0uLlO69EVOPnHCLkC8eDGMHGnnBFLKh2kiUOoiJk6ECRPstD+uqZwy9u+/tvFg/Xr7Q5065UqMSl0OTQRKZWL7djuT6O232zVfMrV3r50aIibGzj3ttiiQUr5ME4FSGThzxvb0zJ8fJk2yC8xkaNs2mwTi4mDBArjttlyLU6nLpYlAqQz062d7Cs2fD+XKZXLi6tW2OigkxM4emqMLEivlfdp9VCkPpk+Hjz6y0/5nOl5g8WK44w47adzPP2sSUH5JE4FSafz1FzzxBNSvD8OGZXLizJl2BZoKFWwSuO663ApRqRzl1UQgIveJyHYR2SkiL3g4fo2ILBaRjSKyTER0bgPlqIQEu+C8CEyenMniMp98Au3bQ2QkLF8OV12Vq3EqlZO8lghEJAT4AGgG1AA6iEjaVbZHABONMbWB14G3vBWPUlkxaJAdAPzZZ/aLvkeTJ9tpIu69184dVKxYboaoVI7zZomgHrDTGLPLGJMATAZapTmnBrDE9Xyph+NK5ZqZM+1CYc88Y9eI8WjDBnj8cWjUCGbPhoIFczVGpbzBm4mgHLDX7XWsa5+734DU/3JtgMIiUiLtG4lIVxFZIyJrdIZR5Q3bttl1YurVs4OBPfr3X7uITPHitjU5X77cDFEpr3G6sbg/cLuIrAduB/YByWlPMsaMN8ZEGmMiS+n6rSqHnTxpSwDh4fb+Hhbm4aSkJDuoYP9+O1isTJlcj1Mpb/HmOIJ9wNVuryNc+84xxvyNq0QgIoWAtsaYY16MSakLGGNrerZvh4UL4eqrMzjx+edh6VI7bUTdurkao1Le5s0SwWqgsohUFJF8QBTwtfsJIlJSRFJjeBH4zIvxKJXOqFF2fYG33rLDATyaONEuMt+nj84dpAKS1xKBMSYJ6AksALYCU40xm0XkdRH5P9dpTYDtIvIHUAbIrNe2Ujlq2TIYMMBWCz3/fAYnrV5t1xBo2tSuUK9UABJ/W+c3MjLSrFmzxukwlJ+LjYWbb7btvitX2nVj0jlwwJ6UNy+sWQMlS+Z6nErlFBFZa4yJ9HRM5xpSQSchwY4FO33algo8JoGEBGjXzi4vuWKFJgEV0DQRqKDz7LPw668wdSpUr57BSX37wk8/QXS0zh+kAp7T3UeVylUTJ9pVI/v3t6UCjz7+2M44N2CAnW9CqQCniUAFjQ0b4OmnoUkT20vIoxUroEcPu7bAm2/mZnhKOUYTgQoKR4/a3kElStipgvJ6qhT95x9o2xbKl7dVQpmuRKNU4NA2AhXwjIFHH7U9hZYvz2BQcGoL8vHjdiK54sVzPU6lnKKJQAW8MWNg3jz72KBBBif172/XFIiOhuuvz9X4lHKaVg2pgLZhg23zbdkSevbM4KQvv4SxY21PIW0cVkFIE4EKWKdOQYcOtl3gs8/sYjPp/PabHTl8223wzju5HqNSvkCrhlTAevbZ85PJeRwPltqCXKwYTJmSyXJkSgU2TQQqIM2YYYcDDBwId97p4YSUFNuCvHevHV585ZW5HaJSPkMTgQo4e/falSTr1oU33sjgpKFDbQvy++/DrbfmanxK+RptI1ABJTkZOnaExET46qsManu+/RYGD7Ylgmeeye0QlfI5WiJQAeXNN+1YgQkT4LrrPJywaxc8/DDUrg3jxmXQgqxUcNESgQoYK1bAkCH2Pv/oox5OOH3aNg6L2JXqCxTI9RiV8kVaIlABIS7OJoDy5e18cem+6BsD3brBxo22beDaax2JUylfpIlA+b3Ue3xsrJ052uP6Ah9+CP/7ny0yNGuW6zEq5cs0ESi/N2GCnUhu6NAMppBYvtyOGm7eHF5+OdfjU8rXaRuB8mt//GGnjmjSBF54wcMJu3fblcYqVYJJkyCP/pNXKi39X6H8VmIiPPII5Mtna33SzRp9+jS0bg1nz8KcOXDFFY7EqZSv06oh5beGDLFryk+fDhERaQ4aA48/bucSmjsXqlZ1JEal/IEmAuWXfvrJrjL22GN2LZl03n7bzh80fDjcf3+ux6eUP9GqIeV34uLsOIEKFWD0aA8nzJsHgwbZKaUHDMjt8JTyO1oiUH6nd2/Ys8eWCgoXTnNw2zY7oKBOHfj0Ux05rFQWaIlA+ZWpU2HiRNsL9JZb0hw8dgxatYKwMJg9W0cOK5VFWiJQfiM21g4cq1fPw3CA5GTbhWjXLliyxA4xVkpliSYC5RdSUqBzZ7vG/JdfephV9OWXYf58O5Fc48aOxKiUv8pS1ZCINBKRx1zPS4lIRe+GpdSFRo2yX/Tfew8qV05zMDra9g7q1g2eftqR+JTyZxdNBCLyGjAQeNG1KxT40ptBKeXut99sJ6DWreGJJ9IcXL/e7mzUKIMuREqpi8lKiaAN8H/AKQBjzN9A2r4aSnlFfLyt+i9e3C49eUEnoCNHoE0buzr99Ol2iLFSKtuy0kaQYIwxImIARKSgl2NS6pwXXoDNm+2iYhcsQJ/aaPD337YfaZkyjsWolL/LSolgqoj8FygqIk8Bi4CPvRuWUvD997a2p1cvuO++NAfffttOHTFqlO1GpJS6ZGKMyfigiAARQDXgHkCABcaYhbkTXnqRkZFmzZo1Tv16lUsOH4brr4dixex8Qvnzux1cuhTuugsefNAuTKyDxpS6KBFZa4yJ9HQs06ohV5XQfGPM9UC2b/4ich8wGggBPjHGDE9zvDwwASjqOucFY8z87P4eFXgGDoR//7U9Qi9IAn//baeOqFrVQ6OBUupSZKVqaJ2I1M3uG4tICPAB0AyoAXQQkRppTnsZmGqMuRGIAj7M7u9RgWf9evjsMzuVRJ06bgcSE+Ghh+DkSds4XKiQYzEqFUiy0lhcH3hERHZjew4JtrBQ+yI/Vw/YaYzZBSAik4FWwBa3cwyQurDgFcDf2YhdBSBj4NlnbUegdKOHX3rJNgxPmgQ10n6nUEpdqqwkgnsv8b3LAXvdXsdik4q7wcD3ItILKAjc5emNRKQr0BWgvE4dENBmz4YffrBLDBctmubAu+/CM8/YSeWUUjnmolVDxpjd2Dr8lq6tqGtfTugAfGGMiQDuB/4nIuliMsaMN8ZEGmMiS5UqlUO/Wvmas2fh+eehZk146im3A3/+abuK1q0LI0c6Fp9SgSorI4v7AJOA0q7tS9c3+IvZB1zt9jrCtc/dE8BUAGPML0A4UBIVlMaOtff8kSMhb2pZ9cwZu/JMSAhMm2ZnFlVK5aisVA09AdQ3xpwCEJG3gV+AsRf5udVAZde8RPuwjcFpy/R7gDuBL0SkOjYRHMp6+CpQHDoEb7xhFxO75x63A7162Tkm5s2Da65xLD6lAllWeg0JkOz2Otm1L1PGmCSgJ7AA2IrtHbRZRF4Xkf9zndYPeEpEfgOigS4ms4ENKmC9+iqcOgX/+Y/bzs8/t4vLvPSSLjeplBdlpUTwObBSRGa5XrcGPs3Km7vGBMxPs+9Vt+dbgIZZC1UFqk2bYPx46NEDqlVz7fz9d9swfMcddpV6pZTXZDqy+NxJIjcBjVwvfzTGrPdqVJnQkcWBxRi49147enjHDtttlKQkqF8f9u2DjRuhdGmnw1TK713yyGLXDzcANhtj1rleFxGR+saYlTkcpwpC8+fDwoV2nYESJVw733sP1q2zjcOaBJTyuouWCERkPXBTat29q3vnGmPMTbkQXzpaIggciYl2PiGwNUGhodilJmvVsi3Gs2bpFBJK5ZDLKhFgk8W5bGGMSRERXeJSXbaPPoLt2+Gbb1xJwBi7wlhoKHzwgSYBpXJJVnoN7RKR3iIS6tr6ALu8HZgKbEeOwODBcPfd0Ly5a+fEibBokV12slw5J8NTKqhkJRF0A27FjgVInSaiqzeDUoFvyBCIi7ODx0SAgwfhueegYUNdd1ipXHbRKh5jzEHsYDClcsS2bbbmp2tX2xwAQN++dlbRjz+GPFn5fqKUyilZmWLiHVdPoVARWSwih0SkY24EpwJT//5QsCC8/rprx7x5EB1tB45Vr+5obEoFo6x89brHGHMcaAHEANcBz3szKBW45s2z2yuvQKlSwIkT0L27nWnuhRecDk+poJSV3j+p5zQHphlj4kR7c6hLcOzY+eqgXqnTFr78MsTGws8/Q758jsanVLDKSiKYKyLbgDNAdxEpBcR7NywViPr1gwMHYM4c1ySiv/5qpxzt2RNuucXp8JQKWllZj+AFbK+hSGNMInAau9KYUln23Xd2+cmBAyEyEkhIgCefhIgIGDbM6fCUCmpZGhhmjDni9vwUdslKpbIkLs4uNFOjhp1lFIB33oHNm+1ossKFHY1PqWCnI4SV1/XvD3//DTNmuKqEtm2ziw9ERUGLFk6Hp1TQ0w7byqu+/x4++cQuQVmvHpCSYosHhQrB6NFOh6eUIpMSgYjcCxQ2xkxPs78dEGeMWejt4JR/O37cNgNUq2ankwDsAvQ//WQXndGZRZXyCZlVDb2KXYQmrWXAN4AmApWpAQPskgI//wzh4cCSJTBoEDz0kF2MXinlEzKrGgozxqRbP9gY8y9Q0HshqUCwaBH89792+qAGDbBjBaKioGpVW1ekY1GU8hmZJYIinqabFpFQIL/3QlL+7sQJWyVUpYprGomEBGjfHs6cgZkzbfuAUspnZJYIZgIfi8i5b/8iUggY5zqmlEcDB8KePbYZIH9+7EiyX3+1O84tSqyU8hWZJYKXgQPAbhFZKyLrgL+AQ65jSqWzZIldcObZZ+HWW4FJk+D9920yaNfO6fCUUh5kZanK/NiJ5gB2GmPOeD2qTOhSlb7r5Em79GRoKGzYAAX+/N0uQl+3LixeDHl12IpSTrmkpSpF5IE0uwxQVEQ2GGNO5GSAKjC8+CLs3g3Ll0OBxDh44AEoWhSmTNEkoJQPy+x/Z0sP+4oDtUXkCWPMEi/FpPzQnDm2Bqh3b2jU0MADXSAmBpYuhSuvdDo8pVQmMkwExpjHPO0XkWuAqdglK5Viyxbo2NHWAL39NnYeodmz4b33oFEjp8NTSl1EtqeYMMbsBkK9EIvyQ8eOQevWUKCA7Rka/vPi84PGevd2OjylVBZku+JWRKoCZ70Qi/Izycnw8MPw11+2BiiCWOjQwXYR1UFjSvmNzBqLv8E2ELsrDpQFHvVmUMo/vPoqfPut7S7aKDIemroGjc2YoYPGlPIjmZUIRqR5bYDDwA5jTIL3QlL+YNo0ePNNO5Ho00+lQMfH7KCx6dN10JhSfiazxuIfPO0XkUYi0sEY08N7YSlftnEjdOliV5ccOxbk5Zdg8mTbUty2rdPhKaWyKUttBCJyI/Aw0B47ulinmAhSR47YxuErrnAtNDNhPAwfDt262UUHlFJ+J7M2gipAB9f2LzAFOxK5aS7FpnxMUpKdQHTfPvjhByi74Vt45hlo3txVNNDGYaX8UWYlgm3Aj0ALY8xOABF5NleiUj7pxRdh4UL49FNokG+dnVH0hhtstZCOHFbKb2U2juAB4B9gqYh8LCJ3Atn6yici94nIdhHZKSIveDg+SkQ2uLY/RORY9sJXuWXSJBgxAnr0gMfv2mNLASVKwNy52kNIKT+XWWPxbGC2axrqVkBfoLSIfATMMsZ8n9kbi0gI8AFwNxALrBaRr40xW9x+x7Nu5/cCbryci1HesW6dXV/gtttg1GvHoOn9tpvookVQtqzT4SmlLtNFRxYbY04ZY74yxrQEIoD1wMAsvHc97Gylu1zdTSdjE0pGOgDRWXhflYsOH4Y2baBkSZg2KYHQqLbwxx92GHHNmk6Hp5TKAdmaYsIYc9QYM94Yc2cWTi8H7HV7Heval45r/qKKgMeJ7ESkq4isEZE1hw6lWz1TeUlyMjzyCOzfDzNnGEq/9JRdcOCTT+COO5wOTymVQ7I915CXRAHTjTHJng66kk+kMSayVKlSuRxa8Bo6FBYsgNGjoe78IeC1MMkAABAVSURBVDBxIgwZAp06OR2aUioHeTMR7AOudnsd4drnSRRaLeRTFiyw9/xHH4WnwyfYF126wCuvOB2aUiqHeTMRrAYqi0hFEcmHvdl/nfYkEakGFAN+8WIsKhv27LFVQjVrwvgOS5GnnoS77oLx43WsgFIByGuJwBiTBPQEFgBbganGmM0i8rqI/J/bqVHAZHOxNTNVrjh71g4PSEiAr9/dTvgjbaFKFTuHUKjOPq5UIPLqKCBjzHxgfpp9r6Z5PdibMajs6dcPVq2Cbz7/l4o9m9ub/7x5dk4JpVRA0uGg6pyvvoIPPoABfc7S4tM2EBsLy5ZBhQpOh6aU8iJNBAqAzZvtlNKNGhreOvQk/PSTnTqiQQOnQ1NKeZmvdB9VDjpxws4eXbgwzLtlKHm++tL2HX3oIadDU0rlAk0EQc4YeOIJ2LEDljwVTZERr9pxAoMGOR2aUiqXaCIIcmPG2NXGJjy9ghrvPmYnFNJuokoFFU0EQWzFCujfH7retYtHpreGq6+2cwiFhTkdmlIqF2kiCFIxMfDAA1Ar4hgf7m2BJCXZbqIlSjgdmlIql2kiCEJHj0KzZpAcn8iPZdsTsmsnzJplB44ppYKOJoIgc/asnVZ615+GjQ27U+iXRbZN4PbbnQ5NKeUQHUcQRFJS4PHH4YcfDFua9afs/E/tJHJdujgdmlLKQVoiCCKvvGJHD/9w5xtU/3Yk9OplZxVVSgU1TQRB4uOP4c03YUqDUdy2+DVbCnjvPe0mqpTSRBAMvvsOuneHUTU/4cFfn4N27WxmyKMfv1JKE0HA27DBTivdL2IKfbZ0hfvug0mTIK82DymlLE0EAWzvXmjeHNqGzWX4vo5I48YwYwbky+d0aEopH6JfCwNUXBzcfz/ceGwpnyW1Q+rUgW++gQIFnA5NKeVjtEQQgBITbTNAka0rmZ3SkjyVr7MNBUWKOB2aUsoHaYkgAPXtCwcWbWR1wWbkvfJKWLhQp45QSmVIE0GAmToVvv9wB+sK3ENY0QKwaBGULet0WEopH6aJIID8+Sf0e+IYP4W3oFCBZFi0TJeZVEpdlCaCAHH2LES1T+bT+Ecozy5k5hKoVs3psJRSfkATQYAYMADarH+Fe5gPH34IjRs7HZJSyk9oIggAs2bBP2OmMpq37Ar03bo5HZJSyo9oIvBzMTEwsvNvfJ/nMVLq30qesWN1/iClVLboOAI/lpAAXR/4ly9Ptia0VFHyzJiuy0wqpbJNSwR+7OUXkhi4/iEiQv8h5Ovl2k1UKXVJNBH4qblz4apR/bmTJfDxF1CvntMhKaX8lCYCP7R3L3wbNYEPGE1Sjz7k7dzZ6ZCUUn5ME4GfSUyEIc1X8f6ppzndoCkFRr3rdEhKKT+njcV+5p3n9jP49wdIKlWWAt9MhdBQp0NSSvk5LRH4CWPgw1FnafJ+W0qHHCHfwl+gZEmnw1JKBQAtEfiBAweg920baNivPg1ZAZ9/Djfc4HRYSqkAoSUCH/ftnAQ2PfwmI08PI6FISczEOeRr9X9Oh6WUCiBeLRGIyH0isl1EdorICxmc86CIbBGRzSLylTfj8SdnzsDwqA2UbV2P508P4VTzhyj412ZEk4BSKod5rUQgIiHAB8DdQCywWkS+NsZscTunMvAi0NAYc1RESnsrHn+ycU0Cy+9/i36HhnKmQAkSvphN0fatnA5LKRWgvFkiqAfsNMbsMsYkAJOBtHezp4APjDFHAYwxB70Yj89LSYEv+2/A1KtHz0ODOdT0IYrs3UI+TQJKKS/yZiIoB+x1ex3r2ueuClBFRH4WkV9F5D4vxuPT/t6dyJeVh/DQf+pSPt9+4ibM5qolX0Lx4k6HppQKcE43FucFKgNNgAhguYhcb4w55n6SiHQFugKUL18+t2P0ut++2YO0aU2n5PXsqPsw180fg5TUNYaVUrnDmyWCfcDVbq8jXPvcxQJfG2MSjTF/AX9gE8MFjDHjjTGRxpjIUqVKeS1gJ6yftpNirRpTwewiduwsKq+apElAKZWrvJkIVgOVRaSiiOQDooCv05wzG1saQERKYquKdnkxJp+y9n9buPKh2ygspzgzdwkRPVs7HZJSKgh5LREYY5KAnsACYCsw1RizWUReF5HUPpALgMMisgVYCjxvjDnsrZh8yer/ruOaTrcREgKJi5dTptlNToeklApSYoxxOoZsiYyMNGvWrHE6jMuyavQvVOnbjNOhV5Bv+WJKNrjO6ZCUUgFORNYaYyI9HdMpJnLZyreWUKPv3cTlK03Yyh81CSilHKeJIBetfHUeNwy6n3/CK1J4/XJK3Bh4PaCUUv5HE0Eu+fX56dz4Rhv+KlCLUpuWUbzGlU6HpJRSgCaCXLGyx0TqjniIrYXrcdXWxRStpN1DlVK+QxOBN6WksPrRMdT/sDPrit7BtX8s4IryVzgdlVJKXUATgTckJ7N9yGR2FbmBul/2YUWJFtTY+Q2FryzodGRKKZWOJoKclJTEn69OYG+RGlQd3IHE+GS+7fglN++ZTcES4U5Hp5RSHjk911BgSEhg9+sTCBv5FpXO/MXvITewruM07vrwAaoW1lyrlPJtmgguR3w8+17/lHzvvc01Z/ayLiSSHzu+x30ftOT6IuJ0dEoplSWaCC6B+Wc//wyfQPj40ZSL/4dfQhqyoOPHtBx7DzcV1QSglPIvmgiyyMSfZdeYuSSO/5zr/vyOq0hmWcgd/PnIJB4Y04RbimsCUEr5J00EmUhJNmz633rixnxBrd8mUSnlCLGUY1qFAUiXztzdsypNdEiAUsrPaSJIIzkZVs49xP4RX1Lt1y+onbSReMJYVbY1Zzo8RuTAu+hQOsTpMJVSKsdoInBz+pRhTs1BtNs9glCS+KNYPVa1+pCqg6O47ZpiToenlFJeoYnA5cxpw9waA+iwZwTb6j5K+Q8GUqVuTafDUkopr9NEgE0Cs2u+ZJPAXT2o9v1YEG38VUoFh6Af7RQfD9OvH0KHmLfY3qQr1RaM0SSglAoqQZ0I4uNhSu1hPLprCH80eoyqiz+CPEH9J1FKBaGgveudPQuTbnyXzjteZsctj1Jl2ceaBJRSQSko73xnz8L/bhrFE9sGsLNuFJV//BxCtEuoUio4BV0iSEiAL+p+wJNbnmPXjW25bsX/NAkopYJaUCWCxET4pP54nv69J3/VbsW1K6Mhr3acUkoFt6BJBImJML7BZzyz4Wliat5PxVVTIDTU6bCUUspxQZMIZrefRPd1T7K72j1UWDMDwsKcDkkppXxC0NSL3Pf0Nezb14prln8F4bpamFJKpQqaRFC4WSMKN2vkdBhKKeVzgqZqSCmllGeaCJRSKshpIlBKqSCniUAppYKcJgKllApymgiUUirIaSJQSqkgp4lAKaWCnBhjnI4hW0TkELA7ze6SwL8OhJPTAuE69Bp8g16Db/Cla7jGGFPK0wG/SwSeiMgaY0yk03FcrkC4Dr0G36DX4Bv85Rq0akgppYKcJgKllApygZIIxjsdQA4JhOvQa/ANeg2+wS+uISDaCJRSSl26QCkRKKWUukSaCJRSKsj5fSIQkftEZLuI7BSRF5yO51KISIyI/C4iG0RkjdPxZIWIfCYiB0Vkk9u+4iKyUER2uB6LORnjxWRwDYNFZJ/rs9ggIvc7GePFiMjVIrJURLaIyGYR6ePa7zefRSbX4G+fRbiIrBKR31zXMcS1v6KIrHTdo6aISD6nY03Lr9sIRCQE+AO4G4gFVgMdjDFbHA0sm0QkBog0xvjKwJOLEpHbgJPARGNMLde+d4AjxpjhrqRczBgz0Mk4M5PBNQwGThpjRjgZW1aJSFmgrDFmnYgUBtYCrYEu+Mlnkck1PIh/fRYCFDTGnBSRUOAnoA/wHDDTGDNZRMYBvxljPnIy1rT8vURQD9hpjNlljEkAJgOtHI4pKBhjlgNH0uxuBUxwPZ+A/c/sszK4Br9ijPnHGLPO9fwEsBUohx99Fplcg18x1knXy1DXZoA7gOmu/T75Wfh7IigH7HV7HYsf/gPC/mP5XkTWikhXp4O5DGWMMf+4nu8HyjgZzGXoKSIbXVVHPlulkpaIVABuBFbip59FmmsAP/ssRCRERDYAB4GFwJ/AMWNMkusUn7xH+XsiCBSNjDE3Ac2AHq4qC79mbJ2jP9Y7fgRUAuoA/wD/cTacrBGRQsAMoK8x5rj7MX/5LDxcg999FsaYZGNMHSACW2NRzeGQssTfE8E+4Gq31xGufX7FGLPP9XgQmIX9B+SPDrjqe1PrfQ86HE+2GWMOuP4zpwAf4wefhas+egYwyRgz07Xbrz4LT9fgj59FKmPMMWApcAtQVETyug755D3K3xPBaqCyq1U+HxAFfO1wTNkiIgVdDWSISEHgHmBT5j/ls74GOruedwbmOBjLJUm9ebq0wcc/C1cD5afAVmPMSLdDfvNZZHQNfvhZlBKRoq7n+bGdWLZiE0I712k++Vn4da8hAFeXsveAEOAzY8wwh0PKFhG5FlsKAMgLfOUP1yAi0UAT7DS7B4DXgNnAVKA8dqrwB40xPtsYm8E1NMFWRRggBnjara7d54hII+BH4HcgxbV7ELaO3S8+i0yuoQP+9VnUxjYGh2C/ZE81xrzu+j8+GSgOrAc6GmPOOhdpen6fCJRSSl0ef68aUkopdZk0ESilVJDTRKCUUkFOE4FSSgU5TQRKKRXkNBGogCciJdxmsNyfZkbLHJ8JUkSWicglLVguIq1FpEZOvJdSWZX34qco5d+MMYex/dE9zi4qInnd5oJxWmtgLuBXM+gq/6YlAhWUROQLERknIiuBd0Skkoh855r470cRqeY6r5SIzBCR1a6toYf3yi8ik0Vkq4jMAvK7HbtHRH4RkXUiMs01n07qGhTviF2HYpWIXCcitwL/B7zrKq1Ucr1Ne9c5f4hIY6//cVTQ0RKBCmYRwK3GmGQRWQx0M8bsEJH6wIfY6YNHA6OMMT+JSHlgAVA9zft0B04bY6q7RpeuAxCRksDLwF3GmFMiMhA7N/3rrp+LM8ZcLyKdgPeMMS1E5GtgrjFmuus9APIaY+q5RtG/BtzlrT+ICk6aCFQwm+ZKAoWAW4FprhsvQJjr8S6ghtv+IiJSyG3eeYDbgDEAxpiNIrLRtb8BUAP42fXz+YBf3H4u2u1xVCZxpk4ktxaokOWrUyqLNBGoYHbK9ZgHO2d8HQ/n5AEaGGPiL+H9BVhojOmQwXGTwfO0UuelSUb/zyov0DYCFfRcc9//JSLtwc6GKSI3uA5/D/RKPVdEPCWL5cDDruO1gNqu/b8CDUXkOtexgiJSxe3nHnJ7TC0pnAAKX/ZFKZUNmgiUsh4BnhCR34DNnF/ytDcQ6VolawvQzcPPfgQUEpGt2Pr/tQDGmEPYtYOjXdVFv3DhQiXFXPv7AM+69k0GnheR9W6NxUp5lc4+qpQDRCQGiDTG/Ot0LEppiUAppYKclgiUUirIaYlAKaWCnCYCpZQKcpoIlFIqyGkiUEqpIKeJQCmlgtz/AywtQOQuYUo/AAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hCbNJylaB5Rk" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BtzglhO8B5Rk" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=50, max_depth=25, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RrQrGCzRB5Rk" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3d62e702-2ce4-4a3a-d429-bf4adf3db446", + "id": "eOr6K2_EB5Rl" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: rumination_video\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[64154 1393]\n", + " [ 1178 62553]]\n", + "Accuracy Score : 0.9801126255047262\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 0.98 0.98 65547\n", + " 1 0.98 0.98 0.98 63731\n", + "\n", + " accuracy 0.98 129278\n", + " macro avg 0.98 0.98 0.98 129278\n", + "weighted avg 0.98 0.98 0.98 129278\n", + "\n" + ] + } + ], + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b-J7V_EyB5Rl" + }, + "source": [ + "##Feature importances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "444088dc-7d8e-4cbf-e616-d10553d90198", + "id": "E14e-rkyB5Rl" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.02046\n", + "Feature: 1, Score: 0.03013\n", + "Feature: 2, Score: 0.03189\n", + "Feature: 3, Score: 0.03386\n", + "Feature: 4, Score: 0.04063\n", + "Feature: 5, Score: 0.08446\n", + "Feature: 6, Score: 0.07268\n", + "Feature: 7, Score: 0.06740\n", + "Feature: 8, Score: 0.06580\n", + "Feature: 9, Score: 0.06534\n", + "Feature: 10, Score: 0.02449\n", + "Feature: 11, Score: 0.02755\n", + "Feature: 12, Score: 0.02978\n", + "Feature: 13, Score: 0.02947\n", + "Feature: 14, Score: 0.03364\n", + "Feature: 15, Score: 0.08171\n", + "Feature: 16, Score: 0.06800\n", + "Feature: 17, Score: 0.06569\n", + "Feature: 18, Score: 0.06516\n", + "Feature: 19, Score: 0.06185\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kgoiE-PcuE7Q" + }, + "source": [ + "# RandomForestAlgorithm Mooing" + ] + }, + { + "cell_type": "code", + "source": [ + "BEHAVIOUR_1= 'rumination_video'\n", + "BEHAVIOUR_2= 'eating_video'\n", + "BEHAVIOUR_3= 'drinking_video'\n", + "BEHAVIOUR_4= 'sleeping_video'\n", + "BEHAVIOUR_5= 'urinating_video'\n", + "BEHAVIOUR_6= 'mooing_video'\n", + "BEHAVIOUR_7= 'resting_video'" + ], + "metadata": { + "id": "GyV1zTcSuhsq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v2Ny9xXvuE7Q" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='mooing'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wQ1OzNkMuE7Q" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,5], axis =0) # BEHAVIOUR_5= 'mooing_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CBuKdCoG9OHu" + }, + "outputs": [], + "source": [ + "# Split dataset into trainingsset (70%) and testset (30%)\n", + "X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.3, random_state=1)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#Up and downsampling" + ], + "metadata": { + "id": "vEFxVd2O9U-N" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zHa5cLkWtNlT" + }, + "outputs": [], + "source": [ + "bool_minority_labels = y_final != 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6y1WDHvdgJ_m" + }, + "outputs": [], + "source": [ + "min_features = X_final[bool_minority_labels]\n", + "maj_features = X_final[~bool_minority_labels]\n", + "\n", + "min_labels = y_final[bool_minority_labels]\n", + "maj_labels = y_final[~bool_minority_labels]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "werDvCiBqkUe" + }, + "source": [ + "Split the minority class in 70 30 to have more in train set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_BAylPThKC4" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_min_train, X_min_test, y_min_train, y_min_test = train_test_split(min_features, min_labels, test_size=0.3, random_state=1) # 70% training and 30% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ivrXT7qZqeDf" + }, + "source": [ + "Split the majority class in 50 50 so we can sample from independant sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zKSNgT5dlbIj" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_maj_train, X_maj_test, y_maj_train, y_maj_test = train_test_split(maj_features, maj_labels, test_size=0.5, random_state=1) # 50% training and 50% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mE0TKvl9pOp1" + }, + "source": [ + "## Create train set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "37WUaiaQqrCY" + }, + "source": [ + "Upsample the amount of minority features and labels 3 times (why 3, no idea, feels that this will create enough labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wTj2Qm65hdjf" + }, + "outputs": [], + "source": [ + "upsampled_X_min_train = np.concatenate([X_min_train, X_min_train, X_min_train], axis=0)\n", + "upsampled_y_min_train = np.concatenate([y_min_train, y_min_train, y_min_train], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tM6-s1RKq4ud" + }, + "source": [ + "Downsample from 50% of the majority class to have same amount of samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jGJqS34NVyHu", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "cba49b0a-f05c-4667-8338-6967165f9c68" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(6648, 30)" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ], + "source": [ + "ids = np.arange(len(X_maj_train))\n", + "choices = np.random.choice(ids, len(upsampled_X_min_train))\n", + "\n", + "res_maj_train_features = X_maj_train[choices]\n", + "res_maj_train_labels = y_maj_train[choices]\n", + "\n", + "res_maj_train_features.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iuBy9-Phq-zG" + }, + "source": [ + "Concat together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GcCAIqLyjawS" + }, + "outputs": [], + "source": [ + "resampled_train_features = np.concatenate([upsampled_X_min_train, res_maj_train_features], axis=0)\n", + "resampled_train_labels = np.concatenate([upsampled_y_min_train, res_maj_train_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CmjdRO7drARg" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QxcQJzutpCVw" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_train_labels))\n", + "np.random.shuffle(order)\n", + "resampled_train_features = resampled_train_features[order]\n", + "resampled_train_labels = resampled_train_labels[order]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sClNWtnVpReY" + }, + "source": [ + "## Create test set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5MNzyY8SrCSd" + }, + "source": [ + "We start from the X_min_test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vqxhKTzgrGZG" + }, + "source": [ + "And downsample the remaining part of the majority class which we didn't use so far" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vhBQR5fEpUB3" + }, + "outputs": [], + "source": [ + "ids = np.arange(len(X_maj_test))\n", + "choices = np.random.choice(ids, len(X_min_test))\n", + "\n", + "res_maj_test_features = X_maj_test[choices]\n", + "res_maj_test_labels = y_maj_test[choices]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pNfZ_OiyrMrA" + }, + "source": [ + "Concat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vHLqu9Ysp-Hw" + }, + "outputs": [], + "source": [ + "resampled_test_features = np.concatenate([X_min_test, res_maj_test_features], axis=0)\n", + "resampled_test_labels = np.concatenate([y_min_test, res_maj_test_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gi7AxlElrNfX" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BYtInbNmp_U3" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_test_labels))\n", + "np.random.shuffle(order)\n", + "resampled_test_features = resampled_test_features[order]\n", + "resampled_test_labels = resampled_test_labels[order]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-ZHJ_d8-01wM" + }, + "outputs": [], + "source": [ + "X_train= resampled_train_features\n", + "y_train= resampled_train_labels\n", + "X_test= resampled_test_features\n", + "y_test= resampled_test_labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9_yfIsBRu54S" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BgiPMipCu54T", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a9e63ac6-56eb-45ec-e238-f9d53e7f9ab0" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A1ZAev21u54U", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "c64e1c5d-2e19-4af9-a2be-12754091c535" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rmBGF-zRu54U" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jSC3S0t3u54V", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "17c9b88f-2079-4b35-9492-3e609871b5df" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4ie_VEPKu54V", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "8caa096d-0e58-4feb-f78d-4ca6b8e2e48e" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8kouoCd3u54V" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pEI4JKIBu54V" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=25, max_depth=20, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4yXXuuZUu54V" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## without resampling" + ], + "metadata": { + "id": "shpeA4Qg9jQf" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x79LUgYS-Enl" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fe5df753-f49d-438f-f6fe-3ee7cb055dc0", + "id": "CtqGerXL-Enl" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "4f12538a-d009-42b0-bd49-e04675df0aaa", + "id": "eOU5jGo6-Enm" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEHCAYAAAC0pdErAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5xVdb3/8debAWdKQBSICERGf6TiJdRRM9HAK6moSRpW52CnRx79eck6nvJSavSwtCwrM01/GVEKmkZxlMQLkB4xZVBUvIKIMmiEXARUlMvn98daGzbDnmHPMGvvDfN+Ph77sdf6rrX2+szaM+sz3+93re9SRGBmZtZYh3IHYGZmlckJwszMCnKCMDOzgpwgzMysICcIMzMrqGO5A2grPXr0iP79+5c7DDOzbcrMmTPfjoiehZZtNwmif//+1NfXlzsMM7NtiqTXm1rmJiYzMyvICcLMzApygjAzs4KcIMzMrCAnCDMzKyizBCHpNkn/kjS7ieWS9EtJcyU9K+nAvGWjJM1JX6OyitHMzJqWZQ1iDDCsmeWfAwakr7OBmwAk7QJcCRwKHAJcKWnnDOM0M7MCMrsPIiIekdS/mVVOAcZGMt74PyR1k9QbGAI8GBFLASQ9SJJoxmUVq5VWBKxZA+++C++9l7zWrCl3VNnJjajf+L3YMq/v9be0rFcvGDGCNlfOG+X6AAvy5hvSsqbKNyPpbJLaB/369csmynZo3brkpJ1/As9NFyprzfJ168r9U5ptPw49dPtLEFstIm4BbgGoq6trl08+Wr4cXn21+JN1MSfwDz9seRw1NfDRj8KOO276vtNO8IlPJNOFluemO3Vq+2NTSaTC78WWeX2v39yyjhmdycuZIBYCu+bN903LFpI0M+WXTytZVBVu3Tqor4fJk+H+++GJJ2D9+ua36dix6RNz9+6bl23pZN647KMfhQ6+Hs5su1POBDEROF/SeJIO6Xci4i1Jk4Ef5nVMHwdcWq4gK8GbbyYJYfJkePBBWLo0+a+hrg4uuyx579y56ZP59v7fuZllI7MEIWkcSU2gh6QGkiuTOgFExM3AJOAEYC7wHvDVdNlSST8AZqQfNTrXYd1efPABPProxqTw3HNJ+cc/DsOHw7BhcMwx0KNHeeM0s+2bIraPpvu6urrYVkdzjYA5c5Imo8mTYdq0pD9ghx1g8GA4/vgkKey336Ztj2ZmW0vSzIioK7Rsm+6k3patWAEPP7yxljB/flI+YAD8x38kSWHIkKTpyMysHJwgSmT9enj66Y21hMcfh7VrkwRw9NHw7W8nSWH33csdqZlZwgkiQxFw993wl78kncuLFyflBx4I//3fSUI47LCkKcnMrNI4QWTohhvgG9+Anj2TZHD88XDsscldj2Zmlc4JIiOPPQb/9V9w8skwYYLvEzCzbY9PWxlYtAjOOAN22w1+/3snBzPbNrkG0cbWroWRI2HZMvjb36Bbt3JHZGbWOk4Qbeyyy5L7GMaOhf33L3c0Zmat58aPNvTnP8NPfgLnngv/9m/ljsbMbOs4QbSRl1+Gs86CQw6B668vdzRmZlvPCaINvPtuMhZ7dXVy30N1dbkjMjPbeu6D2EoR8PWvw4svJndI77rrlrcxM9sWOEFspV/9CsaNg6uvTkZYNTPbXriJaStMnw7f+lYyBPcll5Q7GjOztuUE0UqLFsHpp0O/fsklrb4Zzsy2N25iaoXczXBLlyajsvpmODPbHjlBtMLllyc3w40ZA4MGlTsaM7NsuGGkhSZMgB//GP7zP2HUqHJHY2aWHSeIFnjttSQpHHww/OIX5Y7GzCxbmSYIScMkvSxprqTNrvORtJukhyU9K2mapL55y9ZJmpW+JmYZZ7HGjoVVq+Cuu3wznJlt/zLrg5BUBdwIHAs0ADMkTYyIF/JWuw4YGxG/l3QU8CMgN4rR+xFRUS38U6YkT4Pr37/ckZiZZS/LGsQhwNyImBcRHwLjgVMarTMQmJJOTy2wvGK8915yxdJRR5U7EjOz0sgyQfQBFuTNN6Rl+Z4BTkunPw90kdQ9na+RVC/pH5JOLbQDSWen69Qvzj3wOSPTp8OaNU4QZtZ+lLuT+mLgs5KeBj4LLATWpct2i4g64EvAzyXt0XjjiLglIuoioq5nz56ZBjplCnTsCIMHZ7obM7OKkeV9EAuB/KHr+qZlG0TEm6Q1CEmdgRERsTxdtjB9nydpGnAA8GqG8TZrypRkKO/OncsVgZlZaWVZg5gBDJBUK2kHYCSwydVIknpIysVwKXBbWr6zpOrcOsDhQH7ndkm98w7MmOHmJTNrXzJLEBGxFjgfmAy8CNwVEc9LGi3p5HS1IcDLkl4BegFXp+V7A/WSniHpvL6m0dVPJfXoo7B+vROEmbUvmQ61ERGTgEmNyq7Im74buLvAdtOB/bKMrSWmTk3uezjssHJHYmZWOuXupN4mTJkChx8ONTXljsTMrHScILZgyRKYNQuGDi13JGZmpeUEsQXTpiXv7n8ws/bGCWILpkyBHXdMBugzM2tPnCC2YOpUOPJI6NSp3JGYmZWWE0Qz3noLXnzRzUtm1j45QTRj6tTk3R3UZtYeOUE0Y8qU5HnTfqyombVHThDNmDoVhgyBqqpyR2JmVnpOEE2YPx/mzXP/g5m1X04QTXD/g5m1d04QTZgyBXr2hH32KXckZmbl4QRRQESSII46CqRyR2NmVh5OEAXMmQNvvun+BzNr35wgCsj1PzhBmFl75gRRwNy5yfMf9tjsKdhmZu2HE0QBS5ZAjx7ufzCz9s0JooAlS6B793JHYWZWXpkmCEnDJL0saa6kSwos303Sw5KelTRNUt+8ZaMkzUlfo7KMs7G3305qEGZm7VlmCUJSFXAj8DlgIHCmpIGNVrsOGBsR+wOjgR+l2+4CXAkcChwCXClp56xibcw1CDOzbGsQhwBzI2JeRHwIjAdOabTOQGBKOj01b/nxwIMRsTQilgEPAsMyjHUTThBmZtkmiD7Agrz5hrQs3zPAaen054EukroXuW0m1q+HpUvdxGRmVu5O6ouBz0p6GvgssBBYV+zGks6WVC+pfvHixa0KYOlSOO00+Nvfkvnly5Mk4RqEmbV3WSaIhcCuefN907INIuLNiDgtIg4ALk/LlhezbbruLRFRFxF1PXv2bFWQEkyYAC+/nMwvWZK8O0GYWXuXZYKYAQyQVCtpB2AkMDF/BUk9JOViuBS4LZ2eDBwnaee0c/q4tKzNde6cvK9alby//Xby7iYmM2vvMksQEbEWOJ/kxP4icFdEPC9ptKST09WGAC9LegXoBVydbrsU+AFJkpkBjE7L2lynTsld07kE4RqEmVmiY5YfHhGTgEmNyq7Im74buLuJbW9jY40iU507w8qVybQThJlZotyd1BWhc+fNaxBuYjKz9s4Jgk0TxNtvQ8eO0LVreWMyMys3Jwg2r0HssosH6jMzc4Jg8wTh5iUzMycIYPMmJndQm5k5QQCb1yCcIMzMnCAAJwgzs0KcINiYICL8LAgzsxwnCJIE8d57sGIFrFnjGoSZGThBANClS/L+xhvJuxOEmZkTBLBxwL7XX0/e3cRkZuYEAWxMEPPnJ++uQZiZOUEAm9cgnCDMzJwggM1rEG5iMjMrMkFIGizpq+l0T0m12YZVWvkJQoKddy5rOGZmFWGLCULSlcB3SJ74BtAJ+GOWQZVafhNTt25QVVXeeMzMKkExNYjPAycD70LyHGmgS5ZBlVouQSxe7OYlM7OcYhLEhxERQABI2jHbkEovlyDAHdRmZjnFJIi7JP0G6Cbp68BDwK3ZhlVaThBmZptrNkFIEnAnyXOj7wH2BK6IiBuK+XBJwyS9LGmupEsKLO8naaqkpyU9K+mEtLy/pPclzUpfN7f4J2uBHXZIniIHbmIyM8vp2NzCiAhJkyJiP+DBlnywpCrgRuBYoAGYIWliRLyQt9p3gbsi4iZJA4FJQP902asRMagl+2wtKRluY9ky1yDMzHKKaWJ6StLBrfjsQ4C5ETEvIj4ExgOnNFongNzTn3cC3mzFftpErpnJCcLMLFFMgjgUeFzSq2kz0HOSni1iuz7Agrz5hrQs31XAVyQ1kNQeLshbVps2Pf1d0hGFdiDpbEn1kuoXL15cREhNyyUINzGZmSWabWJKHZ/h/s8ExkTETyUdBvxB0r7AW0C/iFgi6SDgL5L2iYgV+RtHxC3ALQB1dXWxNYG4BmFmtqkt1iAi4nWgGzA8fXVLy7ZkIbBr3nzftCzf14C70v08DtQAPSLig4hYkpbPBF4FPlnEPlvNCcLMbFPF3En9DeB24GPp64+SLmh+KwBmAAMk1UraARgJTGy0zhvA0el+9iZJEIvT4Tyq0vLdgQHAvOJ+pNZxE5OZ2aaKaWL6GnBoRLwLIOla4HGg2UtdI2KtpPOByUAVcFtEPC9pNFAfEROB/wJulfRNkg7rs9Irp44ERktaA6wHzomIpa38GYviGoSZ2aaKSRAC1uXNr0vLtigiJpF0PueXXZE3/QJweIHt7iG576JknCDMzDZVTIL4HfCEpAnp/KnAb7MLqTz69YO+fZOb5szMDJQMs7SFlaQDgcHp7KMR8XSmUbVCXV1d1NfXt3r7Dz6AlSvdB2Fm7YukmRFRV2jZFmsQkj4NPB8RT6XzXSUdGhFPtHGcZVVdnbzMzCxRzI1yNwGr8uZXpWVmZrYdKyZBKPLaoSJiPcX1XZiZ2TasmAQxT9KFkjqlr2+Q8T0JZmZWfsUkiHOAz5DcBd1AMjbT2VkGZWZm5bfFpqKI+BfJXdBmZtaOFDPUxo/TK5c6SXpY0mJJXylFcGZmVj7FNDEdl46iehIwH/g/wH9nGZSZmZVfMQki1wx1IvCniHgnw3jMzKxCFHO56r2SXgLeB86V1BNYnW1YZmZWbsU8D+ISkquY6iJiDfAemz861MzMtjNF3fCWP9R2Ouz3u5lFZGZmFaGYPggzM2uHnCDMzKygJhOEpOMlfaFA+RckHZttWGZmVm7N1SCuAP5eoHwaMDqTaMzMrGI0lyCqI2Jx48KIeBvYMbuQzMysEjSXILpK2uwqJ0mdgI8U8+GShkl6WdJcSZcUWN5P0lRJT0t6VtIJecsuTbd7WdLxxezPzMzaTnMJ4s/ArZI21BYkdQZuTpc1S1IVcCPwOWAgcKakgY1W+y5wV0QcQDIg4K/TbQem8/sAw4Bfp59nZmYl0lyC+C6wCHhd0kxJTwGvAYvTZVtyCDA3IuZFxIfAeDa/wS6Arun0TsCb6fQpwPiI+CAiXgPmpp9nZmYl0uSNchGxFrhE0vdJBuiD5IT/fpGf3QdYkDefe5ZEvquAByRdQNKvcUzetv9otG2fxjuQdDbpsyn69etXZFhmZlaMJhOEpNMaFQXQTdKsiFjZRvs/ExgTET+VdBjwB0n7FrtxRNwC3AJQV1cXW1jdzMxaoLmhNoYXKNsF2F/S1yJiyhY+eyGwa95837Qs39dI+hiIiMcl1QA9itzWzMwy1FwT01cLlUvaDbiLzZuLGpsBDJBUS3JyHwl8qdE6bwBHA2Mk7Q3UkPRxTATukPQz4BPAAODJLf40ZmbWZooarC9fRLyeXuq6pfXWSjofmAxUAbdFxPOSRgP1ETER+C+SK6W+SdKEdVZEBPC8pLuAF4C1wHkRsa6lsZqZWespOR+3YANpT5J+g8OyCal16urqor6+vtxhmJltUyTNjIi6Qsua66T+H5L/6vPtAvQG/q3twjMzs0rUXBPTdY3mA1gCzEnvazAzs+1Yc53UhQbqQ9JgSWdGxHnZhWVmZuVWVCe1pANIrkA6neRu6i0OtWFmZtu25vogPklyI9uZwNvAnSSd2kNLFJuZmZVRczWIl4BHgZMiYi5AejmqmZm1A80N1nca8BYwVdKtko4GVJqwzMys3JpMEBHxl4gYCewFTAUuAj4m6SZJx5UqQDMzK4/mahAARMS7EXFHRAwnGRPpaeA7mUdmZmZltcUEkS8ilkXELRFxdFYBmZlZZWhRgjAzs/bDCcLMzApygjAzs4KcIMzMrCAnCDMzK8gJwszMCnKCMDOzgpwgzMysoEwThKRhkl6WNFfSJQWWXy9pVvp6RdLyvGXr8pZNzDJOMzPbXFHPg2gNSVXAjcCxQAMwQ9LEiHght05EfDNv/QuAA/I+4v2IGJRVfGZm1rwsaxCHAHMjYl76iNLxwCnNrH8mMC7DeMzMrAWyTBB9gAV58w1p2WYk7QbUAlPyimsk1Uv6h6RTswvTzMwKyayJqYVGAndHxLq8st0iYqGk3YEpkp6LiFfzN5J0NnA2QL9+/UoXrZlZO5BlDWIhsGvefN+0rJCRNGpeioiF6fs8YBqb9k/k1rklIuoioq5nz56ti3LNGqivh0WLWre9mdl2KssEMQMYIKlW0g4kSWCzq5Ek7QXsDDyeV7azpOp0ugdwOPBC423bxLJlcPDBcPfdmXy8mdm2KrMmpohYK+l8YDJQBdwWEc9LGg3UR0QuWYwExkdE5G2+N/AbSetJktg1+Vc/tanq6uT9gw8y+Xgzs21Vpn0QETEJmNSo7IpG81cV2G46sF+WsW3gBGFmVpDvpN5hh+TdCcLMbBNOEB06QKdOThBmZo04QUBSi3CCMDPbhBMEJP0QThBmZptwgoAkQXz4YbmjMDOrKE4Q4BqEmVkBlTLURnltqwni/ffhkUdg/nyoqdn4qq4ubr6D/z8ws6Y5QcC2kyAi4IUX4P77YfLkJDlsTdydOrUsobT1fHU1VFW1/lhEwPr1zb8Xs05L1m3N50Hyc7bly8ndSsAJAio7QSxdCg89lCSEyZNhYTqc1d57w7nnwvHHw777Jn0oq1cnP8fq1RtfLZlvvGz1ali+vOnla9du/c/XsWOSLDp2bNmJ1zYmirZOPsUmqO1xvx06gFTub7ZiOEFAZSWItWthxoyNtYQZM5ITY7ducMwxSUI47jiohNFr161rPsEUuyyXbDp02PgH2tR7c8uyXre1n5c7Vlm81q9v289bsyb5Ptpyv9taQi+UhCo9Ie66K/z7v7f5oXCCgCRBvP9++fa/YMHGGsJDDyX/tXfokAwi+N3vwrBhyXTHCvu6qqrgox9NXmZNiajchFjK/X74YdvuO9d8CfDpTztBZKa6Ojkpl0quc3ny5KSm8OKLSXmfPnDaaUkt4ZhjYJddSheTWVak5J+bSvsHZ1uXa3bNsJbmbwyyb2LKdS7nagmPPJJU46ur4cgj4WtfS5LCPvu4/dPMiiNtbGLKiBMEZJcg3n4bvvc9uPdeaGhIyvbaC845J0kIRx7p5hkzq1hOEJBNgpgxA77wBfjnP2H4cLjiiiQpVELnsplZEZwgoG0TRATcfDNcdBH07g2PPQZ1dW3z2WZmJeS7baDtEsS77yZXEvzf/wtHHw0zZzo5mNk2ywkC2iZBvPIKHHoo3H47jB6d9Dt079428ZmZlYGbmCB5HsTWjOZ6zz3w1a8mnzN5Mhx7bNvFZmZWJpnWICQNk/SypLmSLimw/HpJs9LXK5KW5y0bJWlO+hqVZZxUV2+8+aQl1qyBiy9OOqMHDoSnnnJyMLPtRmY1CElVwI3AsUADMEPSxIh4IbdORHwzb/0LgAPS6V2AK4E6IICZ6bbLMgm2ujp5/+CD4i87ffNN+OIX4X//F84/H376043PtzYz2w5kWYM4BJgbEfMi4kNgPHBKM+ufCYxLp48HHoyIpWlSeBAYllmk+QmiGH//Oxx4YFJjuOMOuOEGJwcz2+5kmSD6AAvy5hvSss1I2g2oBaa0ZFtJZ0uql1S/ePHi1kdabIKIgB//OLlCqVs3ePJJOPPM1u/XzKyCVcpVTCOBuyOiRZ0AEXFLRNRFRF3Pnj1bv/diE8Stt8J3vgMjRiQ3wu2zT+v3aWZW4bJMEAuBXfPm+6ZlhYxkY/NSS7fdesUkiPXr4brrkvsaxo+HLl0yC8fMrBJkmSBmAAMk1UragSQJTGy8kqS9gJ2Bx/OKJwPHSdpZ0s7AcWlZNopJEJMmwZw58M1vekA9M2sXMruKKSLWSjqf5MReBdwWEc9LGg3UR0QuWYwExkdsHK82IpZK+gFJkgEYHRFLs4q1qARx/fXJcNynn55ZGGa2qTVr1tDQ0MDq1avLHco2r6amhr59+9KpU6eit8n0RrmImARMalR2RaP5q5rY9jbgtsyCy7elBPHMMzBlCvzoR8lznM2sJBoaGujSpQv9+/dHrrm3WkSwZMkSGhoaqK2tLXq7SumkLq8tJYhf/CK5P+Lss0sXk5mxevVqunfv7uSwlSTRvXv3FtfEnCCg+QSxaFEyvtKoUX7Cm1kZODm0jdYcRycIaD5B3HRTMk7TRReVNiYzszJzgoCmE8Tq1fDrX8NJJ8EnP1n6uMysrJYsWcKgQYMYNGgQH//4x+nTp8+G+Q+3MMBnfX09F154YYv3OWvWLCRx//33byibP38+++677ybrXXXVVVx33XUb5q+77jr22msvBg0axMEHH8zYsWNbvO/GPJorbEwQjb/wO+6AxYuTS1vNrN3p3r07s2bNApITcufOnbn44os3LF+7di0dOxY+jdbV1VHXiufBjBs3jsGDBzNu3DiGDStuhKGbb76ZBx98kCeffJKuXbuyYsUKJkyY0OJ9N+YEAYVrEBHw85/D/vvD0KHlicvMCrroIkjP2y02aFDyp91aZ511FjU1NTz99NMcfvjhjBw5km984xusXr2aj3zkI/zud79jzz33ZNq0aVx33XXce++9XHXVVbzxxhvMmzePN954g4suuqhg7SIi+NOf/sSDDz7IEUccwerVq6mpqdliTD/84Q+ZNm0aXbt2BaBr166MGrX1g2A7QUDhBPHww/Dcc/C73/nGODPbRENDA9OnT6eqqooVK1bw6KOP0rFjRx566CEuu+wy7rnnns22eemll5g6dSorV65kzz335Nxzz93snoTp06dTW1vLHnvswZAhQ7jvvvsYMWJEs7GsWLGClStXsvvuu7fpzwhOEIncSKz5CeL666FXLw/GZ1aBtqYG0BZOP/10qqqqAHjnnXcYNWoUc+bMQRJr1qwpuM2JJ55IdXU11dXVfOxjH2PRokX07dt3k3XGjRvHyJEjARg5ciRjx45lxIgRTV6BlPUVXk4QsHkN4qWXkqE1vv/9jcvMzFI77rjjhunvfe97DB06lAkTJjB//nyGDBlScJvqvHNJVVUVa9eu3WT5unXruOeee/jrX//K1VdfveHmtpUrV9K9e3eWLdv0cThLly6ltraWrl270rlzZ+bNm9fmtQhfxQSb1yBuvx2qquCcc8oXk5ltE9555x369EmeRjBmzJhWf87DDz/M/vvvz4IFC5g/fz6vv/46I0aMYMKECXTu3JnevXszZUryRISlS5dy//33M3jwYAAuvfRSzjvvPFasWAHAqlWr2uQqJicIgA4dkiE0cgni6adh773hYx8rb1xmVvG+/e1vc+mll3LAAQdsVitoiXHjxvH5z39+k7IRI0Ywblwy0PXYsWP5wQ9+wKBBgzjqqKO48sor2WOPPQA499xzGTp0KAcffDD77rsvRxxxBB06bP3pXXlj5G3T6urqor6+vvUf0KULfP3r8LOfwa67wpAh8Ic/tFl8ZtZyL774InvvvXe5w9huFDqekmZGRMHrcV2DyKmuTmoQS5ZAQwN86lPljsjMrKycIHJyCeKZZ5J5Jwgza+ecIHKcIMzMNuEEkZNLELNmQe/e7qA2s3bP90Hk5BLEK6+49mBmhmsQG1VXw6pV8MILThBmZjhBbFRdnfQ/rFmTjOZlZu3e1gz3DTBt2jSmT5/e7Dqnnnoqn/70pzcpO+uss7j77rs3KevcufOG6VdeeYUTTjiBAQMGcOCBB3LGGWewaNGiFvxkxcm0iUnSMOAXQBXw/yLimgLrnAFcBQTwTER8KS1fBzyXrvZGRJycZaxUVyeXuIJrEGYGbHm47y2ZNm0anTt35jOf+UzB5cuXL2fmzJktGipj9erVnHjiifzsZz9j+PDhG/azePFievXqVXRsxcgsQUiqAm4EjgUagBmSJkbEC3nrDAAuBQ6PiGWS8nuG34+I0v0rnxsnpaYGBgwo2W7NrEhbM8Z3U1ox9vfMmTP51re+xapVq+jRowdjxoyhd+/e/PKXv+Tmm2+mY8eODBw4kGuuuYabb76Zqqoq/vjHP3LDDTdwxBFHbPJZf/7znxk+fDi9evVi/PjxXHbZZVvc/x133MFhhx22ITkATY7/tLWyrEEcAsyNiHkAksYDpwAv5K3zdeDGiFgGEBH/yjCe5uUSxH77QRMPADGz9i0iuOCCC/jrX/9Kz549ufPOO7n88su57bbbuOaaa3jttdeorq5m+fLldOvWjXPOOafZWse4ceO44oor6NWrFyNGjCgqQcyePZuDDjqorX+0grI8E/YBFuTNNwCHNlrnkwCSHiNphroqInLP2auRVA+sBa6JiL803oGks4GzAfr167d10eYShJuXzCpTucf4Bj744ANmz57NscceCyQjsPbu3RuA/fffny9/+cuceuqpnHrqqVv8rEWLFjFnzhwGDx6MJDp16sTs2bPZd999Cw7jnfXQ3oWUu5O6IzAAGAKcCdwqqVu6bLd0fJAvAT+XtEfjjSPiloioi4i6nj17bl0kuRFdnSDMrAkRwT777MOsWbOYNWsWzz33HA888AAA9913H+eddx5PPfUUBx988BYH7rvrrrtYtmwZtbW19O/fn/nz528YmK/x8N5Lly6lR48eAOyzzz7MnDkzo59wU1kmiIXArnnzfdOyfA3AxIhYExGvAa+QJAwiYmH6Pg+YBhyQYayuQZjZFlVXV7N48WIef/xxANasWcPzzz/P+vXrWbBgAUOHDuXaa6/lnXfeYdWqVXTp0oWVK1cW/Kxx48Zx//33M3/+fObPn8/MmTMZP348kPQp3HnnnRuulBozZgxD00cff+lLX2L69Oncd999Gz7rkUceYfbs2W3+82aZIGYAAyTVStoBGAlMbLTOX0hqD0jqQdLkNE/SzpKq88oPZ9O+i7aXSxD775/pbsxs29WhQwfuvvtuvvOd7/CpT32KQYMGMX36dNatW8dXvvIV9ttvPw444AAuvPBCunXrxsTqJIAAAAi1SURBVPDhw5kwYQKDBg3i0Ucf3fA5uec95F/eWltby0477cQTTzzBSSedxBFHHMFBBx3EoEGDeOyxx7j22msB+MhHPsK9997LDTfcwIABAxg4cCC//vWv2epWlAIyHe5b0gnAz0n6F26LiKsljQbqI2Kikka1nwLDgHXA1RExXtJngN8A60mS2M8j4rfN7Wurh/ueOhUeeAB+9KPWf4aZtSkP9922Wjrcd6aX60TEJGBSo7Ir8qYD+Fb6yl9nOrBflrFtZujQ5GVmZkD5O6nNzKxCOUGYWUXbXp56WW6tOY5OEGZWsWpqaliyZImTxFaKCJYsWUJNTU2LtvMtw2ZWsfr27UtDQwOLFy8udyjbvJqaGvr27duibZwgzKxiderUidra2nKH0W65icnMzApygjAzs4KcIMzMrKBM76QuJUmLgddbsWkP4O02DqctVGpcULmxOa6WqdS4oHJj2x7j2i0iCo7Tsd0kiNaSVN/UbeblVKlxQeXG5rhaplLjgsqNrb3F5SYmMzMryAnCzMwKcoKAW8odQBMqNS6o3NgcV8tUalxQubG1q7jafR+EmZkV5hqEmZkV5ARhZmYFtesEIWmYpJclzZV0SRnj2FXSVEkvSHpe0jfS8qskLZQ0K32dUIbY5kt6Lt1/fVq2i6QHJc1J33cucUx75h2TWZJWSLqoXMdL0m2S/iVpdl5ZwWOkxC/T37lnJR1Y4rh+IumldN8TJHVLy/tLej/v2N1c4ria/O4kXZoer5clHV/iuO7Mi2m+pFlpeSmPV1Pnh+x/xyKiXb5IHoP6KrA7sAPwDDCwTLH0Bg5Mp7sArwADgauAi8t8nOYDPRqV/Ri4JJ2+BLi2zN/jP4HdynW8gCOBA4HZWzpGwAnA3wABnwaeKHFcxwEd0+lr8+Lqn79eGY5Xwe8u/Tt4BqgGatO/2apSxdVo+U+BK8pwvJo6P2T+O9aeaxCHAHMjYl5EfAiMB04pRyAR8VZEPJVOrwReBPqUI5YinQL8Pp3+PXBqGWM5Gng1IlpzF32biIhHgKWNips6RqcAYyPxD6CbpN6liisiHoiItensP4CWjf+cUVzNOAUYHxEfRMRrwFySv92SxiVJwBnAuCz23Zxmzg+Z/4615wTRB1iQN99ABZyUJfUHDgCeSIvOT6uJt5W6KScVwAOSZko6Oy3rFRFvpdP/BHqVIa6ckWz6R1vu45XT1DGqpN+7/yD5TzOnVtLTkv4u6YgyxFPou6uU43UEsCgi5uSVlfx4NTo/ZP471p4TRMWR1Bm4B7goIlYANwF7AIOAt0iquKU2OCIOBD4HnCfpyPyFkdRpy3KttKQdgJOBP6VFlXC8NlPOY9QUSZcDa4Hb06K3gH4RcQDwLeAOSV1LGFJFfnd5zmTTf0RKfrwKnB82yOp3rD0niIXArnnzfdOyspDUieTLvz0i/gwQEYsiYl1ErAduJaOqdXMiYmH6/i9gQhrDolyVNX3/V6njSn0OeCoiFqUxlv145WnqGJX9907SWcBJwJfTEwtpE86SdHomSVv/J0sVUzPfXSUcr47AacCdubJSH69C5wdK8DvWnhPEDGCApNr0P9GRwMRyBJK2b/4WeDEifpZXnt9u+HlgduNtM45rR0ldctMkHZyzSY7TqHS1UcBfSxlXnk3+qyv38WqkqWM0Efj39EqTTwPv5DUTZE7SMODbwMkR8V5eeU9JVen07sAAYF4J42rqu5sIjJRULak2jevJUsWVOgZ4KSIacgWlPF5NnR8oxe9YKXrhK/VF0tv/Ckn2v7yMcQwmqR4+C8xKXycAfwCeS8snAr1LHNfuJFeQPAM8nztGQHfgYWAO8BCwSxmO2Y7AEmCnvLKyHC+SJPUWsIakvfdrTR0jkitLbkx/554D6koc11yS9unc79nN6boj0u94FvAUMLzEcTX53QGXp8frZeBzpYwrLR8DnNNo3VIer6bOD5n/jnmoDTMzK6g9NzGZmVkznCDMzKwgJwgzMyvICcLMzApygjAzs4KcIMzMrCAnCLOtIGlQo6GpT1YbDR2vZAjzj7bFZ5m1hu+DMNsK6bAVdRFxfgafPT/97LdbsE1VRKxr61isfXINwtqF9AEvL0q6NX3oygOSPtLEuntIuj8dwfZRSXul5adLmi3pGUmPpEO0jAa+mD405ouSzpL0q3T9MZJukvQPSfMkDUlHKn1R0pi8/d0kqT6N6/tp2YXAJ4CpkqamZWcqeXjTbEnX5m2/StJPJT0DHCbpGiUPl3lW0nXZHFFrF7K6PdwvvyrpRfKAl7XAoHT+LuArTaz7MDAgnT4UmJJOPwf0Sae7pe9nAb/K23bDPMkQDeNJhj44BVgB7Efyj9nMvFhyQyRUAdOA/dP5+aQPayJJFm8APYGOwBTg1HRZAGek091JhqRQfpx++dWal2sQ1p68FhGz0umZJEljE+mQyp8B/qTk8ZK/IXmiF8BjwBhJXyc5mRfjfyIiSJLLooh4LpIRS5/P2/8Zkp4Cngb2IXlaWGMHA9MiYnEkD/y5neQJaADrSEb6BHgHWA38VtJpwHubfZJZkTqWOwCzEvogb3odUKiJqQOwPCIGNV4QEedIOhQ4EZgp6aAW7HN9o/2vBzqmI5ReDBwcEcvSpqeaIj433+pI+x0iYq2kQ0ietPcF4HzgqBZ+nhngPgizTUTyIJbXJJ0OGx4A/6l0eo+IeCIirgAWk4y5v5LkOcGt1RV4F3hHUi+SZ1zk5H/2k8BnJfVIh5k+E/h74w9La0A7RcQk4JvAp7YiNmvnXIMw29yXgZskfRfoRNKP8AzwE0kDSPoUHk7L3gAuSZujftTSHUXEM5KeBl4iGYb7sbzFtwD3S3ozIoaml89OTfd/X0QUeg5HF+CvkmrS9b7V0pjMcnyZq5mZFeQmJjMzK8hNTNZuSboROLxR8S8i4nfliMes0riJyczMCnITk5mZFeQEYWZmBTlBmJlZQU4QZmZW0P8HXP8YPRHkYv8AAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vAMnQXIy-Enm" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a4a7c998-984c-4233-dc94-29781d71e4a6", + "id": "D5k-zr_3-Enm" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "5f1e7165-f595-4b2f-88c7-5bae36cf559f", + "id": "RJQxwK3a-Enm" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "r2BFqioH9g7m", + "outputId": "14bc05c9-9ffa-4488-a00c-319ac9672def" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: mooing\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[128327 15]\n", + " [ 778 158]]\n", + "Accuracy Score : 0.9938659323318739\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 1.00 128342\n", + " 1 0.91 0.17 0.28 936\n", + "\n", + " accuracy 0.99 129278\n", + " macro avg 0.95 0.58 0.64 129278\n", + "weighted avg 0.99 0.99 0.99 129278\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## with resaampling" + ], + "metadata": { + "id": "uHRZyGAV9v6F" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yIS1sqfJu54W", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8c6c0b38-1280-47f5-c324-397295580d1c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: mooing\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[909 41]\n", + " [ 73 877]]\n", + "Accuracy Score : 0.94\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.96 0.94 950\n", + " 1 0.96 0.92 0.94 950\n", + "\n", + " accuracy 0.94 1900\n", + " macro avg 0.94 0.94 0.94 1900\n", + "weighted avg 0.94 0.94 0.94 1900\n", + "\n" + ] + } + ], + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z5IVVh1pu54W", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9c885051-7f9a-4e66-9960-01075e9ed535" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.01592\n", + "Feature: 1, Score: 0.02538\n", + "Feature: 2, Score: 0.02926\n", + "Feature: 3, Score: 0.03384\n", + "Feature: 4, Score: 0.03788\n", + "Feature: 5, Score: 0.08007\n", + "Feature: 6, Score: 0.06814\n", + "Feature: 7, Score: 0.06643\n", + "Feature: 8, Score: 0.06666\n", + "Feature: 9, Score: 0.06630\n", + "Feature: 10, Score: 0.01729\n", + "Feature: 11, Score: 0.02487\n", + "Feature: 12, Score: 0.02559\n", + "Feature: 13, Score: 0.03034\n", + "Feature: 14, Score: 0.03188\n", + "Feature: 15, Score: 0.07919\n", + "Feature: 16, Score: 0.06715\n", + "Feature: 17, Score: 0.06396\n", + "Feature: 18, Score: 0.07723\n", + "Feature: 19, Score: 0.09262\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCMaUgSUvKlX" + }, + "source": [ + "# RandomForestAlgorithm Urinating" + ] + }, + { + "cell_type": "code", + "source": [ + "BEHAVIOUR_1= 'rumination_video'\n", + "BEHAVIOUR_2= 'eating_video'\n", + "BEHAVIOUR_3= 'drinking_video'\n", + "BEHAVIOUR_4= 'sleeping_video'\n", + "BEHAVIOUR_5= 'urinating_video'\n", + "BEHAVIOUR_6= 'mooing_video'\n", + "BEHAVIOUR_7= 'resting_video'" + ], + "metadata": { + "id": "tflHZ5fbvKlX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mNYoZP1JvKlY" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='urinating'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nlQ3ksqVvKlY" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,4], axis =0) # BEHAVIOUR_5= 'urinating_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "URQyVflE2GbI" + }, + "outputs": [], + "source": [ + "bool_minority_labels = y_final != 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VAI35WN82GbK" + }, + "outputs": [], + "source": [ + "min_features = X_final[bool_minority_labels]\n", + "maj_features = X_final[~bool_minority_labels]\n", + "\n", + "min_labels = y_final[bool_minority_labels]\n", + "maj_labels = y_final[~bool_minority_labels]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MVy_-x6W2GbL" + }, + "source": [ + "Split the minority class in 70 30 to have more in train set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V3oyvRyL2GbL" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_min_train, X_min_test, y_min_train, y_min_test = train_test_split(min_features, min_labels, test_size=0.3, random_state=1) # 70% training and 30% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "goqZyjnl2GbM" + }, + "source": [ + "Split the majority class in 50 50 so we can sample from independant sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsz2Q9cX2GbM" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_maj_train, X_maj_test, y_maj_train, y_maj_test = train_test_split(maj_features, maj_labels, test_size=0.5, random_state=1) # 50% training and 50% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uoE1Sm6D2GbM" + }, + "source": [ + "## Create train set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LMOZu9Gd2GbN" + }, + "source": [ + "Upsample the amount of minority features and labels 3 times (why 3, no idea, feels that this will create enough labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KVXBHLmb2GbN" + }, + "outputs": [], + "source": [ + "upsampled_X_min_train = np.concatenate([X_min_train, X_min_train, X_min_train], axis=0)\n", + "upsampled_y_min_train = np.concatenate([y_min_train, y_min_train, y_min_train], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mDuG8HSl2GbN" + }, + "source": [ + "Downsample from 50% of the majority class to have same amount of samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "32f1c0d5-2d34-4634-fcfb-b284bbda9982", + "id": "x4YtP7Am2GbN" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(2322, 30)" + ] + }, + "metadata": {}, + "execution_count": 69 + } + ], + "source": [ + "ids = np.arange(len(X_maj_train))\n", + "choices = np.random.choice(ids, len(upsampled_X_min_train))\n", + "\n", + "res_maj_train_features = X_maj_train[choices]\n", + "res_maj_train_labels = y_maj_train[choices]\n", + "\n", + "res_maj_train_features.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nrKRCDIG2GbO" + }, + "source": [ + "Concat together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uRtKN0jQ2GbO" + }, + "outputs": [], + "source": [ + "resampled_train_features = np.concatenate([upsampled_X_min_train, res_maj_train_features], axis=0)\n", + "resampled_train_labels = np.concatenate([upsampled_y_min_train, res_maj_train_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2lEtyoxZ2GbO" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hqpuUUN-2GbP" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_train_labels))\n", + "np.random.shuffle(order)\n", + "resampled_train_features = resampled_train_features[order]\n", + "resampled_train_labels = resampled_train_labels[order]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pvVlPMJO2GbP" + }, + "source": [ + "## Create test set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fEh4v9A-2GbP" + }, + "source": [ + "We start from the X_min_test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5CzpA55W2GbP" + }, + "source": [ + "And downsample the remaining part of the majority class which we didn't use so far" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4LogLMlC2GbQ" + }, + "outputs": [], + "source": [ + "ids = np.arange(len(X_maj_test))\n", + "choices = np.random.choice(ids, len(X_min_test))\n", + "\n", + "res_maj_test_features = X_maj_test[choices]\n", + "res_maj_test_labels = y_maj_test[choices]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_i24ByfF2GbQ" + }, + "source": [ + "Concat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aSdbMMCF2GbQ" + }, + "outputs": [], + "source": [ + "resampled_test_features = np.concatenate([X_min_test, res_maj_test_features], axis=0)\n", + "resampled_test_labels = np.concatenate([y_min_test, res_maj_test_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K2MIdTOu2GbQ" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Iavr8fGG2GbQ" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_test_labels))\n", + "np.random.shuffle(order)\n", + "resampled_test_features = resampled_test_features[order]\n", + "resampled_test_labels = resampled_test_labels[order]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LFL3K6Zb2GbR" + }, + "outputs": [], + "source": [ + "X_train= resampled_train_features\n", + "y_train= resampled_train_labels\n", + "X_test= resampled_test_features\n", + "y_test= resampled_test_labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K3VPRg_4vKlY" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ec607b26-2308-43ce-9851-ce0c6547a066", + "id": "oNyVrhn_vKlY" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "id": "V19IJ6tgvKlY", + "outputId": "73d3e7ef-5c5e-4d48-b106-260955d4901b" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b0pxjgsHvKlY" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qRTzxBMUvKlY", + "outputId": "d79c4081-756c-4851-80b0-8e361d7a8926" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "id": "FYgFS1_TvKlY", + "outputId": "b248708b-bf1f-4063-d995-22b1347b62e7" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x-dEOvK3vKlY" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DVB_q8aYvKlZ" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=50, max_depth=15, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uqdyWymOvKlZ" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "markdown", + "source": [ + "without resampling" + ], + "metadata": { + "id": "FhyVsSTXFK0h" + } + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7p7P1j2fFM9c", + "outputId": "da5a9a90-aee8-4cd4-ce27-f23c9fcb150d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: urinating\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[128334 8]\n", + " [ 855 81]]\n", + "Accuracy Score : 0.9933244635591516\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 1.00 128342\n", + " 1 0.91 0.09 0.16 936\n", + "\n", + " accuracy 0.99 129278\n", + " macro avg 0.95 0.54 0.58 129278\n", + "weighted avg 0.99 0.99 0.99 129278\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "with resampling" + ], + "metadata": { + "id": "1eT7eoiuFON5" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b56cd904-7afc-422b-974b-22ed45aa0bd2", + "id": "T6kHN-4qvKlZ" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: urinating\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[330 3]\n", + " [ 10 323]]\n", + "Accuracy Score : 0.9804804804804805\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.97 0.99 0.98 333\n", + " 1 0.99 0.97 0.98 333\n", + "\n", + " accuracy 0.98 666\n", + " macro avg 0.98 0.98 0.98 666\n", + "weighted avg 0.98 0.98 0.98 666\n", + "\n" + ] + } + ], + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "md2_QB9OvKlZ" + }, + "source": [ + "##Feature importances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7bb77931-f809-42cf-d610-eda9e5beef46", + "id": "9XaiZrZnvKlZ" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.03025\n", + "Feature: 1, Score: 0.04197\n", + "Feature: 2, Score: 0.03687\n", + "Feature: 3, Score: 0.03050\n", + "Feature: 4, Score: 0.05664\n", + "Feature: 5, Score: 0.08549\n", + "Feature: 6, Score: 0.09199\n", + "Feature: 7, Score: 0.07974\n", + "Feature: 8, Score: 0.06275\n", + "Feature: 9, Score: 0.05362\n", + "Feature: 10, Score: 0.02258\n", + "Feature: 11, Score: 0.02659\n", + "Feature: 12, Score: 0.02583\n", + "Feature: 13, Score: 0.02239\n", + "Feature: 14, Score: 0.02241\n", + "Feature: 15, Score: 0.06408\n", + "Feature: 16, Score: 0.10550\n", + "Feature: 17, Score: 0.05459\n", + "Feature: 18, Score: 0.04518\n", + "Feature: 19, Score: 0.04102\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UKfQ0sQNuyCu" + }, + "source": [ + "# RandomForestAlgorithm Drinking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I5ReXUBFwc_s" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='drinking'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xd3VcSpRu54R" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,2], axis =0) # BEHAVIOUR_3= 'drinking_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3l1yQk4x3bZf" + }, + "outputs": [], + "source": [ + "bool_minority_labels = y_final != 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wwlZmpB53bZf" + }, + "outputs": [], + "source": [ + "min_features = X_final[bool_minority_labels]\n", + "maj_features = X_final[~bool_minority_labels]\n", + "\n", + "min_labels = y_final[bool_minority_labels]\n", + "maj_labels = y_final[~bool_minority_labels]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iT8xfEZJ3bZf" + }, + "source": [ + "Split the minority class in 70 30 to have more in train set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vZkPmnWa3bZf" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_min_train, X_min_test, y_min_train, y_min_test = train_test_split(min_features, min_labels, test_size=0.3, random_state=1) # 70% training and 30% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ah9x9ENT3bZf" + }, + "source": [ + "Split the majority class in 50 50 so we can sample from independant sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qFZrwQ__3bZf" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_maj_train, X_maj_test, y_maj_train, y_maj_test = train_test_split(maj_features, maj_labels, test_size=0.5, random_state=1) # 50% training and 50% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KSlpyP1q3bZf" + }, + "source": [ + "## Create train set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O4owWQRk3bZf" + }, + "source": [ + "Upsample the amount of minority features and labels 3 times (why 3, no idea, feels that this will create enough labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EXf42TdN3bZf" + }, + "outputs": [], + "source": [ + "upsampled_X_min_train = np.concatenate([X_min_train, X_min_train, X_min_train], axis=0)\n", + "upsampled_y_min_train = np.concatenate([y_min_train, y_min_train, y_min_train], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iknpy8I13bZg" + }, + "source": [ + "Downsample from 50% of the majority class to have same amount of samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "66b8c1d5-1294-41bd-db76-47b886fca2d1", + "id": "1vdAC-yZ3bZg" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(9630, 30)" + ] + }, + "metadata": {}, + "execution_count": 89 + } + ], + "source": [ + "ids = np.arange(len(X_maj_train))\n", + "choices = np.random.choice(ids, len(upsampled_X_min_train))\n", + "\n", + "res_maj_train_features = X_maj_train[choices]\n", + "res_maj_train_labels = y_maj_train[choices]\n", + "\n", + "res_maj_train_features.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nkaHMdJ83bZg" + }, + "source": [ + "Concat together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pDd6j6Rk3bZg" + }, + "outputs": [], + "source": [ + "resampled_train_features = np.concatenate([upsampled_X_min_train, res_maj_train_features], axis=0)\n", + "resampled_train_labels = np.concatenate([upsampled_y_min_train, res_maj_train_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1mKCAC63bZg" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R2MgX8A_3bZg" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_train_labels))\n", + "np.random.shuffle(order)\n", + "resampled_train_features = resampled_train_features[order]\n", + "resampled_train_labels = resampled_train_labels[order]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C2IC3TFz3bZg" + }, + "source": [ + "## Create test set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5O0lyU253bZg" + }, + "source": [ + "We start from the X_min_test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uvfmf-413bZg" + }, + "source": [ + "And downsample the remaining part of the majority class which we didn't use so far" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GGG_IsWr3bZg" + }, + "outputs": [], + "source": [ + "ids = np.arange(len(X_maj_test))\n", + "choices = np.random.choice(ids, len(X_min_test))\n", + "\n", + "res_maj_test_features = X_maj_test[choices]\n", + "res_maj_test_labels = y_maj_test[choices]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YP1DJjEc3bZh" + }, + "source": [ + "Concat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q49p8NE53bZh" + }, + "outputs": [], + "source": [ + "resampled_test_features = np.concatenate([X_min_test, res_maj_test_features], axis=0)\n", + "resampled_test_labels = np.concatenate([y_min_test, res_maj_test_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ptpIZyYP3bZh" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B54HxSnM3bZh" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_test_labels))\n", + "np.random.shuffle(order)\n", + "resampled_test_features = resampled_test_features[order]\n", + "resampled_test_labels = resampled_test_labels[order]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T-QdKCWM3bZh" + }, + "outputs": [], + "source": [ + "X_train= resampled_train_features\n", + "y_train= resampled_train_labels\n", + "X_test= resampled_test_features\n", + "y_test= resampled_test_labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yP4nHQt0c-JK" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0c10d4de-d7bc-45a1-95b0-c48f297723c9", + "id": "GLY0ux3ac-JK" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "70e2de90-e69b-4353-8a0c-498fb1fa5e22", + "id": "a907J5sQc-JK" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4eJWv-_Sc-JK" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "66981005-158b-4663-b0de-bb5a58aeb130", + "id": "JeGF1xm9c-JK" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n", + "31.0\n", + "32.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 32, 32, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 50)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "a2ac141d-d763-4cc5-a0ba-0dbe5157c93f", + "id": "Op91IzY9c-JK" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3hUZfbA8e8hhLIUgYCIRCAiiigIS6xYcF2VFfmBomtABVZ3WTuoqLCoIDYsWEAFUUGxhCKi2EBEEBULiVgoCghBQgmR0EIPOb8/3hsyhslkkkxJOZ/nuc/M3DJzLqNz8nZRVYwxxpiCqkQ7AGOMMWWTJQhjjDF+WYIwxhjjlyUIY4wxflmCMMYY41fVaAcQKg0bNtQWLVpEOwxjjClXUlNT/1DVRv6OVZgE0aJFC1JSUqIdhjHGlCsisrawY1bFZIwxxi9LEMYYY/yyBGGMMcavCtMG4c+BAwdIT09n79690Q6lQqhRowbx8fHExsZGOxRjTARU6ASRnp5OnTp1aNGiBSIS7XDKNVVly5YtpKenk5CQEO1wjDERELYqJhGZICKbRWRJIcdFREaLyCoR+UlE/upzrK+IrPS2viWNYe/evcTFxVlyCAERIS4uzkpjxlQi4WyDeBXoEuD4P4BW3tYfGAsgIg2AYcDpwGnAMBGpX9IgLDmEjv1bGlO5hK2KSVUXiEiLAKd0Byapm2/8GxGpJyJNgM7AHFXNAhCRObhEkxyuWI0x4aUK+/fD3r1u27Mn/3netm8fVKlS9CYCBw+6LTc38POijh886GLL2/JiLex13pabG/i573X+3jPU4uOhf//Qv2802yCaAut8Xqd7+wrbfxgR6Y8rfdCsWbPwRFkKW7Zs4YILLgBg06ZNxMTE0KiRG7D43XffUa1atUKvTUlJYdKkSYwePbpYn/nDDz/QoUMHPv74Y7p0cQW4tLQ0Lr30UpYsya/tGz58OLVr12bQoEEAPPnkk7z88svUqFGD2NhYbr31Vvr06VOszzblw969kJUFW7cW/rh7N+TkFL0dOOB++H0f/e3bty/ad112hKMgfvrpFS9BlJqqjgfGAyQmJpa5lY/i4uL44YcfgMN/kAFycnKoWtX/V5CYmEhiYmKxPzM5OZmzzz6b5OTkQwmiKOPGjWPOnDl899131K1blx07djBjxoxif7aJHlXYsQM2bPC/rV/vHjMyXIIojAjUqwe1akFsLFStWvgWEwM1a7rzYmOhWrU/P/o+r1Gj6C3v76Xc3KK3mJj8rUqVwM+DOZ5373k/3nnP/b3OK8UEel7wurzn5U00E8R64Bif1/HevvW4aibf/fMjFlWY9evXjxo1arB48WI6depEUlISAwYMYO/evdSsWZOJEydywgknMH/+fJ588kk++OADhg8fzu+//87q1av5/fffGThwILfddtth762qTJs2jTlz5nDOOeewd+9eatSoUWRMjzzyCPPnz6du3boA1K1bl759S9w3wJSCKmzeDGvXui09HbZvh50787fs7MOfb9vmqm0KqlsXjj7abeecA40bQ1wc1K8PDRoc/li3bv4PpjHRTBAzgVtEZDKuQXq7qm4UkdnAIz4N0xcBQ0L5wQMHgveHfbG1bw/PPFO6z09PT2fhwoXExMSwY8cOvvjiC6pWrcqnn37K//73P6ZPn37YNb/88gvz5s1j586dnHDCCdx4442HjUdYuHAhCQkJtGzZks6dO/Phhx/Ss2fPgLHs2LGDnTt3cuyxx5bupkzQdu1y//399lt+Isjbfv/df3VMrVpQpw7Uru0e69SBJk3yX/smgrwt77gxJRW2BCEiybiSQEMRScf1TIoFUNVxwEfAJcAqYDfwL+9Ylog8CCzy3mpEXoN1RXHllVcSExMDwPbt2+nbty8rV65ERDhw4IDfa7p27Ur16tWpXr06Rx55JBkZGcTHx//pnOTkZJKSkgBISkpi0qRJ9OzZs9DeR9YrKfz27oUff4RFiyAlxW3Ll7tqkjxHHQXNmrk/Prp3h+bN3evmzeGYY+CII1xViDGRFs5eTL2KOK7AzYUcmwBMCEdcUPoSQGnVqlXr0PP77ruP888/nxkzZpCWlkbnzp39XlO9evVDz2NiYsjJyfnT8YMHDzJ9+nTee+89Hn744UMD23bu3ElcXBxbt2790/lZWVkkJCRQt25dateuzerVq60UUQqqroF35UqXEPKSwZIlrjEX4Mgj4dRT4YorIDERTjjBJYAgagFNtOXk5HejqkTKdSN1RbB9+3aaNnWdtF599dUSv8/cuXNp164ds2fPPrSvb9++zJgxgz59+tCkSRM+++wz/va3v5GVlcWsWbMYMGAAAEOGDOHmm29mypQp1K1bl+zsbN555x3rxVSAKmRmwqpVblu5Mv/5qlWuHSBP/fouCdx9t3tMTHRdEa3QFgK5ubBli2txz8iATZv+/Jj3vEoVaNsWTjklf2vYsOj337cPfv7ZZfjU1PxMn5vrGmoaNHANOXFx+c/97cvb/vKXor/4wu5p69b8rmEHDuR3HSv4/Ljj4PHHQ/Pv68MSRJTdfffd9O3bl4ceeoiuXbuW+H2Sk5O57LLL/rSvZ8+ejB07lj59+jBp0iRuvvlm7rjjDgCGDRtGy5YtAbjxxhvJzs7m1FNPJTY2ltjYWO68886S31QF8scf8O678PbbsHChaxDOU6UKtGjh/t/s3ds9HncctGkDxx5bSZJBXtEpPd1t69blP8/bNm50P5oJCX/eWrRwj0cd9ee/zHNz3TVpabBmTf6W9zo9Pb9Y5qtaNfdejRu7otn+/fDJJzBpUv45Rx8N7dr9OWns2ZOfCFJTXXLIq+qNi4OOHWHQINd1a8sWd79btriuYT/95F5nZxf+b1S9+uEJpFYt9x9XXjLYvNkNyihIJL9LWF63Mn/PfWolQkk0XCM3IiwxMVELLhi0fPlyTjzxxChFVDFVhn/TzZthxgyXFObNc//fHnssdOniqoXyEkGLFvldMyucXbsO/2u24POMDPcjWbDfbEyM+yGOj3fbUUe5H9S8H/pNm/58fvXq7h/zqKNcYli79vCW+iZN8hNK8+bu3Lxk0Lixe37EEf6z8ubN7of8xx/zt+XL85NAnvr1XTJITMx/bN48uEy/b19+4sh7LPjc93V2tivN5N1DwXsp6p5CSERSVdVvn3orQRiD+8165x2XFD7/3P0R26oV3HOPazNo376ClQh27y78L/Q1a/5cX+YrLi7/B+y006Bp0/xEkLc1buz+ui3Mnj3uswp+/qZN7q/77t3/XMJo3twNuCipI4+Ev//dbXn274dffnGJo1o1lwwSEkr+JVev7pJYkyYlj7MMsgRhKq3sbJgyxdVAfPGFqy1p3RqGDnVJoW3bCpIUsrNd1vvkE/juO/djnJHx53Nq1Mj/QT7zTFdFU/Cv2kaNXHVGadWsCSee6LZoqVbNJaN27aIXQzlgCcJUOt9/D+PHw1tvuTaF1q3h/vvhyitd+0G5Twq5uW6gxezZLil89ZWrTqlZ083J0K1bfjLI2xo3rgA3bkLNEoSpFHbuhORklxhSU90fzFdd5eavOfPMMvzbuG+f/8bLgrKyYO5clxTmzHENoOAaYW+/HS66CDp1sj61plgsQZgKS9V1TBk/3iWHXbtctdGYMXD11a5NskzYt88Nq1650m0rVuQ/bthQvPdq3Bj+8Q+XEP7+d1c1ZEwJWYIwFdKKFdCrl6tO+stfICnJlRZOO60MlBbWrYMXXnBFmZUr3fwavkOrGzZ0LeQXXui6TwXzV3/NmnDuuS4DVrLBXCZ8LEGEUWmm+waYP38+1apV46yzzir0nB49erBp0ya++eabQ/v69evHpZdeyhVXXHFoX+3atcn2+mqvWLGCgQMHsnLlSurUqcNxxx3HmDFjaNy4cYnvtSzZtMl1Sd250/0O9+7tegtG3YoV8Nhj8PrrrnjTvr2r3+rTB44/3iWFVq3KUNHGVHaWIMKoqOm+izJ//nxq165daILYtm0bqampxZoqY+/evXTt2pWnnnqKbt26HfqczMzMCpEgdu6Erl1dJ535893UFlG3eDE8+qjrQ1u9Ovz3v27gVfPm0Y7MmICsLBphqampnHfeeXTs2JGLL76YjRs3AjB69GjatGlDu3btSEpKIi0tjXHjxvH000/Tvn17vvjii8Pe65133qFbt24kJSUxefLkoD7/rbfe4swzzzyUHAA6d+7MySefHJobjKIDB1xPpB9/hKlTQ5wcdu921UHFWZP7iy/gkkvgr391jceDB7u+/2PGWHIw5ULlKUGUZo7vwhRz7m9V5dZbb+W9996jUaNGTJkyhaFDhzJhwgRGjhzJmjVrqF69Otu2baNevXrccMMNAUsdycnJ3H///TRu3JiePXvyv//9r8gYlixZQseOHYOOubxQdW0Ms2fDSy+5UkTI3vidd+C22/IbjJs0OXy6iLwtPh4+/RQeeQS+/NKNHXjkEbjppjJSz2VM8CpPgigD9u3bx5IlS7jwwgsBNwNrE2/kZbt27bj66qvp0aMHPXr0KPK9MjIyWLlyJWeffTYiQmxsLEuWLOHkk0/2O413RZ/ae9gwePVV9/jvf4foTdPS4JZb4MMPXXfR4cNdA0feyN8vv3Tdo3wbmEVcUjnmGBg9Gq6/3rWSG1MOVZ4EEe05vnEliJNOOomvv/76sGMffvghCxYs4P333+fhhx/m559/DvheU6dOZevWrSQkJABu4Z/k5GQefvjhw6b3zsrKoqE3i+VJJ53E559/HsK7ir7x4+HBB91v8bBhIXjDAwfcfy/Dh7vXTz4JAwb4nz7iwAE3eZzvdBWtWrluUxV2oiZTWVgbRARVr16dzMzMQwniwIEDLF26lNzcXNatW8f555/PY489xvbt28nOzqZOnTrs9J0+1EdycjKzZs0iLS2NtLQ0UlNTD7VDdO7cmSlTprB//37ATSN+/vnnA9C7d28WLlzIhx9+eOi9FixYwJIlS8J562Hz/vtw442u6//YsSHowvr1126itrvvduMIli+HO+8sfG6h2FhXtfS3v7kM9eCDrleSJQdTAYQ1QYhIFxH5VURWichgP8ebi8hcEflJROaLSLzPsYMi8oO3zQxnnJFSpUoV3n77be655x5OOeUU2rdvz8KFCzl48CDXXHMNbdu2pUOHDtx2223Uq1ePbt26MWPGjMMaqdPS0li7di1nnHHGoX0JCQkcccQRfPvtt1x66aWcc845dOzYkfbt2/PVV1/x2GOPAVCzZk0++OADxowZQ6tWrWjTpg0vvPDCoe635cm337rR0B06uEbpUk0TtG2byzSdOrk5+GfMgPfec0u7GVNJhW26bxGJAVYAFwLpuCVEe6nqMp9zpgEfqOprIvI34F+qeq13LFtVg15R16b7joyy8m+6ciWcdZZbi3nhQjeAuERUYfJkNx1FZqarSnrgAbfQszGVQLSm+z4NWKWqq70gJgPdgWU+57QB7vCezwPeDWM8poLIyHAD4QBmzSphcli2zDUwT57sloM79VT4+GNXHDHGAOFNEE2BdT6v04HTC5zzI3A58CxwGVBHROJUdQtQQ0RSgBxgpKoeljxEpD/QH6CZVQVUaFlZbmaK1FR48023rsy8ea49OGhpaS4hJCe7dQCqVIHzz4f77nOTM8XEhCt8Y8qlaPdiGgQ8JyL9gAXAeiBv6srmqrpeRI4FPhORn1X1N9+LVXU8MB5cFZO/D1DVCt/FM1Iitfrgli35ySBvS0vLP37ssW5Q8ukF/9zwJyPDNVAkJ7sGaIAzzoBnn4V//tMmszMmgHAmiPXAMT6v4719h6jqBlwJAhGpDfRU1W3esfXe42oRmQ90AP6UIIpSo0YNtmzZQlxcnCWJUlJVtmzZQo0wTRetCnfdBdOmubnr8rRs6SbYu/FG17moQwe3pG+R1q93vYrmzHHjFNq2dQPWkpJcryNjTJHCmSAWAa1EJAGXGJKA3r4niEhDIEtVc4EhwARvf31gt6ru887pBDxe3ADi4+NJT08nMzOzdHdiAJdw4+Pjiz6xBJ5+GkaNgksvdWPTOnZ0M1TUq1eCN8vJcYlg8WIYMsRN63rSSSGP2ZiKLmwJQlVzROQWYDYQA0xQ1aUiMgJIUdWZQGfgURFRXBXTzd7lJwIvikgurivuSN/eT8GKjY09NJDMlF3ffefWfr78cld1VOrC3v33u1HOb7zh2haMMSUStm6ukeavm6sp+7Zty+84tHhxCUsMvmbNcqPm/v1vNymTMSagaHVzNSYgVbjuOjdTxZdfhiA5rF8P117r2htGjw5JjMZUZpYgTNQ895wbsDxqVJA9kgLJyXFtDXv2uF5LNWuGJEZjKjNLECYqUlPdmjndurlBzKU2bJhbf+GNN6B16xC8oTHGJuszEbd9uxuC0Lixm6K71I3Ss2e7LqzXX2+N0saEkJUgTESpwn/+A2vXwoIFQY5pCGT9erjmGjj5ZGt3MCbELEGYiBo3zg2GGznSTbZXKjk50Lu3a3eYNs0W5jEmxCxBmIj54QfX3vCPf7hR06U2fLgrhrz+urU7GBMG1gZhImLnTtfuEBcHr73m5skrlU8+ce0O113nqpiMMSFnJQgTdqpwww3w229uBtZSr020YYNLCm3awJgxIYnRGHM4K0GYsJswAd56y63Dc+65pXyzVatcUWTXLmt3MCbMLEGYsFq7FgYOdEs2DxlSwjdRdUWP7t3h+OPd5E0vvwxlYGU7YyoySxAmbFShf3/3+MorJViPZ+9eN1CiQweXYRYuhKFDXdbp1SscIRtjfFgbhAmb115zbcnPPQctWhTjwowMGDvWbZs3uzEOL7/surTaFBrGRIwlCBMWGze6Lq3nnOMW+wnKsmXwxBOuwWL/fuja1dVPXXBBCIZbG2OKyxKECTlVuOkmV0P08stBdmn99ltXjQRuqu7bboMTTghrnMaYwCxBmJCbNg3efRcef9y1KRdpxQpXWjjqKDfvd5MmYY/RGFO0sDZSi0gXEflVRFaJyGA/x5uLyFwR+UlE5otIvM+xviKy0tv6hjNOEzp//OGWDE1MDHKW1k2b4OKLXTFj9mxLDsaUIWErQYhIDPA8cCGQDiwSkZkFlg59Epikqq+JyN+AR4FrRaQBMAxIBBRI9a7dGq54TWgMGOBWiZs7F6oW9V/Xjh1u3o3MTNeN9bjjIhKjMSY44SxBnAasUtXVqrofmAx0L3BOG+Az7/k8n+MXA3NUNctLCnOALmGM1YTA+++79uWhQ92ibgHt3+8WoV6yxC1EfeqpEYnRGBO8cCaIpsA6n9fp3j5fPwKXe88vA+qISFyQ1yIi/UUkRURSMjMzQxa4Kb5t29x0Gm3bBjEgLjcX+vVzxYxXXoEulvuNKYuiPVBuEHCeiCwGzgPWAweDvVhVx6tqoqomNir1BD+mNO66yzUnvPIKVKsWxMnJyfDoo9CnT0TiM8YUXzh7Ma0HjvF5He/tO0RVN+CVIESkNtBTVbeJyHqgc4Fr54cxVlMKn37qurPefXcQNUWjRsFTT8Gtt8I990QkPmNMyYSzBLEIaCUiCSJSDUgCZvqeICINRSQvhiHABO/5bOAiEakvIvWBi7x9pozJznYrxLVq5ZZnCOitt9xC1FdcAU8/bYPfjCnjwlaCUNUcEbkF98MeA0xQ1aUiMgJIUdWZuFLCoyKiwALgZu/aLBF5EJdkAEaoala4YjUlN3QopKW5dXsCzoLx6aeu3eG889wCP8WemMkYE2miqtGOISQSExM1JSUl2mFUKgsXwtlnw803F7Esw+LFbp7vhASXSerVi1iMxpjARCRVVRP9HYt2I7Upp1TdQLimTV1bc6F27YKePaF+ffj4Y0sOxpQjNtWGKZH333fLMrz0EtSuHeDEe++FNWvg889dNjHGlBtWgjDFlpsL990HLVtC30CToHzzDTz7rJvOtdRLyRljIs1KEKbY3n4bfvoJ3ngDYmMLOWnfPrjuOoiPh5EjIxqfMSY0LEGYYsnJgfvvhzZtICkpwIkPPQTLl8NHH0HduhGLzxgTOpYgTLG8+Sb8+itMnx6gp+qPP7pSw7XXusn4jDHlknVzNUHbv9+t4dOgAaSkFDLOLScHzjgD1q1zK8TFxUU8TmNM8AJ1c7UShAnahAluUNwLLwQYBD1qFKSmulWDLDkYU65ZCcIEZc8et1xDixZu0Te/CWLFCmjXDi65xNVB2VQaxpR5VoIwpTZuHGzY4Nog/P7u5+bC9de7+Taef96SgzEVgCUIU6TsbDda+oILoHPnQk4aO9YVLSZOtGVDjakgbKCcKdLo0W5V0IceKuSEtWth8GC46KIiRs4ZY8oTSxAmoG3b4Ikn4NJLXeekw6jCf//rHl980aqWjKlArIrJBDRqlEsSDz5YyAmTJsHs2W461xYtIhmaMSbMrARhCpWZCc88A1deCe3b+zlh0yY3pWunTnDTTRGPzxgTXmFNECLSRUR+FZFVIjLYz/FmIjJPRBaLyE8icom3v4WI7BGRH7xtXDjjNP499hjs3g0PPFDICXff7U545RWoYn9rGFPRhK2KSURigOeBC4F0YJGIzFTVZT6n3QtMVdWxItIG+Aho4R37TVX9/d1qImDDBtdb9Zpr4MQT/Zzw/fduZbjBg93wamNMhRPOP/tOA1ap6mpV3Q9MBroXOEeBvJncjgA2hDEeUwwPP+xmzRg2zM9BVbe2dMOGLkEYYyqkcCaIpsA6n9fp3j5fw4FrRCQdV3q41edYglf19LmInOPvA0Skv4ikiEhKZmZmCEOv3NascQsBXX89HHusnxM++gjmzXPZ44gjIh6fMSYygkoQInK2iPzLe95IRBJC9Pm9gFdVNR64BHhdRKoAG4FmqtoBuAN4S0QOmzNaVceraqKqJjZq1ChEIVVuqtC/P1Sr5haDO0xOjmt7aNXKdW81xlRYRbZBiMgwIBE4AZgIxAJvAJ2KuHQ9cIzP63hvn6/rgS4Aqvq1iNQAGqrqZmCftz9VRH4DjgdssqUwGzsWPv3UPcbH+zlhwgQ3S+s77wRYLcgYUxEEU4K4DPg/YBeAqm4A6gRx3SKglYgkiEg1IAmYWeCc34ELAETkRKAGkOmVUmK8/ccCrYDVQXymKYVVq+Cuu9yAaL+Fg+xst1pQp07Qo0fE4zPGRFYwvZj2q6qKiAKISK1g3lhVc0TkFmA2EANMUNWlIjICSFHVmcCdwEsicjuuwbqf91nnAiNE5ACQC9ygqlnFvz0TrIMH3SwZsbGu16rfAdFPPAEZGfDeezZi2phKIJgEMVVEXgTqich/gOuAl4J5c1X9CNf47Lvvfp/ny/BTVaWq04HpwXyGCY1Ro2DhQtdz1W/V0oYN8OST8M9/wumnRzw+Y0zkBUwQIiLAFKA1sAPXDnG/qs6JQGwmQpYsgfvug8svh6uvLuSkYcPgwAE3rasxplIImCC86p6PVLUtYEmhAtq/H/r0cb1Vx40rpOZoyRLXOD1gQCH9Xo0xFVEwjdTfi8ipYY/ERMXDD8PixTB+PBTaU/juu6Fu3UL6vRpjKqpg2iBOB64WkbW4nkyCK1y0C2tkJuwWLXIJ4tprA3RK+vRT+Phj1/7QoEFE4zPGRFeRa1KLSHN/+1V1bVgiKiFbk7p49uyBjh1hxw5Xg1Svnp+TcnPdSdu2wS+/QPXqEY/TGBNepVqTWlXXisgpQN50F1+o6o+hDNBE3r33wvLlbikHv8kB4I034Icf4K23LDkYUwkV2QYhIgOAN4Ejve0NEbk18FWmLFuwAJ5+Gm680Q2K82vPHhg6FE49Fa66KqLxGWPKhmDaIK4HTlfVXQAi8hjwNTAmnIGZ8Ni5E/r1g4QEePzxACc+8wykp8Obb9paD8ZUUsEkCAEO+rw+6O0z5dCgQZCW5koRtWsXctLmzW68Q/fucO65kQzPGFOGBJMgJgLfisgM73UP4JXwhWTCZe5c15110CA4++xCTlKF//wH9u6FkSMjGp8xpmwJppH6KRGZD+T9pPxLVReHNSoTcnv3ujaHli1hxIgAJ44ZAzNnukaK1q0jFp8xpuwJZrrvM4Clqvq997quiJyuqt+GPToTMo8+CitXwiefQM2ahZyUmuqKF926uVHTxphKLZjWx7FAts/rbG+fKSd++cUliKuvhgsvLOSkHTtcb6XGjWHiRJut1RgTXCO1+oymU9VcEQnmOlMGqLq1HWrXhqeeCnDSDTe4tUbnz4e4uEiGaIwpo4IpQawWkdtEJNbbBmCL95Qbr77qeiw99hgceWQhJ02YAMnJ8MADcI7f5b+NMZVQMFNtHAmMBv6GW9RnLjDQWxa0zLCpNg73xx+unbl1a5ck/A5nWLrUDYY76yw3rDomJuJxGmOiJ9BUG0WWIFR1s6omqeqRqtpYVXsHmxxEpIuI/Coiq0RksJ/jzURknogsFpGfROQSn2NDvOt+FZGLg/k882eDBsH27fDii4Ukh927XbtDnTpuWg1LDsYYH8FMtfG413MpVkTmikimiFwTxHUxwPPAP4A2QC8RaVPgtHuBqaraAbdm9QvetW281ycBXYAX8taoNsGZNw9ee82tMX3SSYWcNHCgK0G8/jocdVRE4zPGlH3BtEFcpKo7gEuBNOA44K4grjsNWKWqq1V1PzAZ6F7gHAXqes+PADZ4z7sDk1V1n6quAVZ572eCsG+fa3M+9tgASzhMngwvvQSDBweYkMkYU5kF0xsp75yuwDRV3S7BdYFsCqzzeZ2OW1vC13DgE2/yv1rA332u/abAtU0LfoCI9Af6AzRr1iyYmCqFkSNhxQqYNQv+8hc/J6xaBf37u3aHgKPmjDGVWTAliA9E5BegIzBXRBoBe0P0+b2AV1U1HrgEeF1Egp4ZTlXHq2qiqiY2KnQ5tMrl11/hkUegVy+42F/Lzb59kJQEVau6nkuxsRGP0RhTPgQz1cZgEXkc2K6qB0VkN4dXFfmzHjjG53W8t8/X9bg2BlT1axGpATQM8lpTgKqbTqNmzQBjHgYPdiOmZ8wAK3UZYwII6q91Vc1S1YPe812quimIyxYBrUQkQUSq4RqdZxY453fgAgARORGoAWR65yWJSHURSQBaAd8FE2tl9vrrrnH6sccKaXMeMw6vulgAABc/SURBVMZN433rrQHWGDXGGCdsI6JVNUdEbgFmAzHABFVdKiIjgBRVnQncCbwkIrfjGqz7eaO2l4rIVGAZkAPcnJegjH9//AF33AFnnukmYz3Mk0+6Lk09erjnxhhThCIHypUXlX2g3HXXuRLE999D27YFDj70ENx3nxvz8Prr1u5gjDmkRAPlRORiEbnCz/4rRKSwKd9MFHzyiZtf7847CyQHVdfP9b77oE8ftzqcJQdjTJACtUHcD3zuZ/98wPpGlhHbt8O//+2m0xg2zOeAKtx9Nzz8sDth4kQbKW2MKZZAbRDVVTWz4E5V/UNEaoUxJlMMgwbB+vWwcKHPOg+5uW49h+eeg1tugWeftXWljTHFFuhXo66/ab1FJBYobMkZE0GzZsHLL7u259PzhiDm5rr5vZ97ztU5jR5tycEYUyKBfjnewfUwOlRaEJHawDjvmImibdtczVGbNjB8uLczJwf69XNZ49574YknbOEfY0yJBUoQ9wIZwFoRSRWR74E1uHEKhc3wYyLkjjtg0ya33kONGsCBA3DNNa6X0oMPus2SgzGmFAptg1DVHGCwiDyAm6AP3OR7eyISmSnURx+5NuchQ9xSDuTmurk1pk93pYZBg6IdojGmAig0QYjI5QV2KVBPRH5Q1Z3hDcsUZutWNxDupJN8ei1NmOCSw+OPW3IwxoRMoF5M3fzsawC0E5HrVfWzMMVkArj9dsjIgPfeg+rVcfVMd90F551nycEYE1KBqpj+5W+/iDQHpnL41N0mzN5/3y0CNHQoJOaNexwwAPbsgfHjrc3BGBNSxe7/qKprARuOG2FZWa73atu2bmA0AB98AFOnuh5Lxx8f1fiMMRVPsSfrE5ETgH1hiMUEMGAAbN7sckL16sDOnW5u75NPdiOmjTEmxAI1Ur+Pa5j21QBoAlwbzqDMn733HrzxBtx/P/z1r97OoUPdEOpp06BatajGZ4ypmAKVIArOCa3AFmClt8a0iYAtW1zV0imnuJwAwDffuJHSN98MZ5wR1fiMMRVXoEZqfxP1ISJni0gvVb05fGGZPHfe6ZLErFleQeHAAdfPtWlTt7aoMcaESVBtECLSAegNXIkbTW1TbUTAmjVuYPTtt0P79t7OJ56AJUtcvVOdOlGNzxhTsQVqgzge6OVtfwBTcAsMnR/sm4tIF+BZ3IpyL6vqyALHnwby3u8vwJGqWs87dhD42Tv2u6r+X7CfW1HkzbM3cKC3Y8UKGDECrrgC/q/S/XMYYyIsUAniF+AL4FJVXQXgLQ0aFBGJAZ4HLgTSgUUiMlNVl+Wdo6q3+5x/K9DB5y32qGp7Kqnt292ce0lJEB+PW9/hv/91Ey+NHh3t8IwxlUCgcRCXAxuBeSLykohcABRnJNZpuLmbVnuN2pOB7gHO7wUkF+P9K7SXX4bsbFe9BLjpNObPd1VMTZpEMzRjTCVRaIJQ1XdVNQloDcwDBgJHishYEbkoiPduCqzzeZ3u7TuMNzo7AfCdvqOGiKSIyDci0qOQ6/p756RkZh62tlG5lZPj1vjp3Nnr1pqR4abROOccuP76aIdnjKkkihxJraq7VPUtVe0GxAOLgXtCHEcS8LaqHvTZ19xbSLs38IyItPQT23hVTVTVxEaNGoU4pOiZPh3WrXNTegNulNzu3W46DVv8xxgTIcX6tVHVrd6P8gVBnL4eOMbndby3z58kClQvqep673E1bh3sDodfVvGowqhR0KoVdO0KfPghTJniptNo3Tra4RljKpFw/jm6CGglIgkiUg2XBGYWPElEWgP1ga999tUXkere84ZAJ2BZwWsrooULYdEi1/ZQhVxXjGjTBu4JdaHNGGMCK/ZcTMFS1RwRuQWYjevmOkFVl4rICCBFVfOSRRIwWVV9p/U4EXhRRHJxSWykb++niuypp6BBA+jTB5g713VtfeMNm07DGBNx8uff5fIrMTFRU1JSoh1Gqfz2m6taGjIEHn4YuOwy+PJLSE/3ZugzxpjQEpFUr733MNbiWYY8+yxUreqmWCI9HWbOdL2WLDkYY6LAEkQZsXWrG+rQuzccfTTw0kv5g+OMMSYKLEGUES+9BLt2eQPjDhxwO7p0gYSEaIdmjKmkLEGUAQcOuNkzLrjATevNzJmwcaNbEMgYY6IkbL2YTPCmTXNr/4wf7+144QVo1gwuuSSqcRljKjcrQURZ3sC41q1djRK//gqffebaHmJioh2eMaYSsxJElH3xBXz/Pbz4ojeLxrhxEBtrcy4ZY6LOShBR9tRTEBcH116Lm2/p1Vfh8suhceNoh2aMqeQsQUTRypWuPfqmm6BmTWDyZNi2ze0wxpgoswQRRc8842qTDuWDsWPhpJPctN7GGBNlliCiJCsLJk6Ea66Bo44CUlLcdsMNIMVZl8kYY8LDEkSUPPcc7Nnjs2Lc2LFQq5bXGGGMMdFnCSIKUlPhoYegZ084+WTcPBvJyXD11XDEEdEOzxhjAEsQEbdjB1x1latWOjQw7rXXXHHCRk4bY8oQGwcRQaquiSEtDebPd+s+oOrGPpxxBrRvH+UIjTEmX1hLECLSRUR+FZFVIjLYz/GnReQHb1shItt8jvUVkZXe1jeccUbKxImuJumBB+Dss72d8+a50dPWtdUYU8aEbcEgEYkBVgAXAum4JUh7FbYynIjcCnRQ1etEpAGQAiQCCqQCHVV1a2GfV9YXDFq2DBIT4ayzYPZsn1k0rrzSJYn0dKhRI6oxGmMqn2gtGHQasEpVV6vqfmAy0D3A+b2AZO/5xcAcVc3yksIcoEsYYw2rPXtcu0Pt2vD66z7JYcMGmDED/vUvSw7GmDInnG0QTYF1Pq/TgdP9nSgizYEE4LMA1zYNQ4wRMXAgLFkCs2ZBkyY+B15+GQ4etEWBjDFlUlnpxZQEvK2qB4tzkYj0F5EUEUnJzMwMU2ilM3Wq6610zz1w8cU+B3Jy3IGLL4bjjotafMYYU5hwJoj1wDE+r+O9ff4kkV+9FPS1qjpeVRNVNbFRo0alDDf0Vq+G//zHdVB68MECBz/4wC0CYV1bjTFlVDgTxCKglYgkiEg1XBKYWfAkEWkN1Ae+9tk9G7hIROqLSH3gIm9fubF/PyQluSm8k5PdnEuH7NwJQ4a4RYG6do1ajMYYE0jY2iBUNUdEbsH9sMcAE1R1qYiMAFJUNS9ZJAGT1ac7lapmiciDuCQDMEJVs8IVazgMHQqLFsH06dCihc8BVfj3v2HFCpg7F6raUBRjTNkUtm6ukVaWurl+/LFbLfSmm+D55wscHD0aBgyAkSNdw4QxxkRRoG6uliBCbMMGOOUUOPpo+PbbAr1Xv/4azj3XZY9337VZW40xURetcRCV0qBBkJ0NU6YUSA6bN7tBcc2aubmXLDkYY8o4qwAPoZQU1yA9dCi0bu1z4OBB6N0b/vjDlSLq1YtajMYYEyxLECGiCnfdBY0awd13Fzg4fLhrkH7lFejQIRrhGWNMsVmCCJEPP3QztD73HNStW+DAQw/Bdde5zRhjyglrpA6BnBzXMH3gACxd6jPmIS0N/vpXaN4cFi6EmjWjEp8xxhQmUCO1lSBCYOJEN1vr9Ok+yWHvXrjiCsjNhbfftuRgjCl3LEGUUnY23H8/dOoEl13mc2DgQLe26LvvQsuWUYvPGGNKyhJEKY0aBZs2wTvv+PRcnTQJXnzRDYTrHmiGc2OMKbtsHEQpbNoETzwBPXvCmWd6O3/+2a0r2rmza5w2xphyyhJEKQwfDvv2waOPejuysqBHDzfOITnZ5lkyxpRr9gtWQsuXu/V+broJWrXCdWVKSnJLh37+ORx1VLRDNMaYUrEEUUKDB0OtWnDffd6O//0P5sxxWeOMM6IamzHGhIJVMZXAggUwc6ZLEo0a4aqTnnjCFSeuvz7a4RljTEjYQLliys11BYSNG92SDjV/Wez6uCYmwqefQrVqYY/BGGNCxQbKhdDUqW4hoIkToWZ2pmuUjouDadMsORhjKpSwVjGJSBcR+VVEVonI4ELO+aeILBORpSLyls/+gyLyg7cdtlRpNOzb55oa2rWDa3vlwFVXQUYGzJgBjRtHOzxjjAmpsJUgRCQGeB64EEgHFonITFVd5nNOK2AI0ElVt4rIkT5vsUdV24crvpJ44QVYswZmzYKYwXfBvHlubYdEv6UzY4wp18JZgjgNWKWqq1V1PzAZKDis+D/A86q6FUBVN4cxnlLZtg0efBAuvBAuzpgEzzzjptPo0yfaoRljTFiEM0E0Bdb5vE739vk6HjheRL4SkW9EpIvPsRoikuLt7+HvA0Skv3dOSmZmZmijL2DsWNi6FUb3TYH+/eH8813PJWOMqaCi3UhdFWgFdAbigQUi0lZVtwHNVXW9iBwLfCYiP6vqb74Xq+p4YDy4XkzhCnL/fhgzBq48N4PWgy9zg+CmTLGR0saYCi2cJYj1wDE+r+O9fb7SgZmqekBV1wArcAkDVV3vPa4G5gNRW4pt8mTI3HiAcVlXwpYtrlG6UaNohWOMMRERzgSxCGglIgkiUg1IAgr2RnoXV3pARBriqpxWi0h9Eanus78TsIwoUHUztg5tMpEGS76Al16yZUONMZVC2OpIVDVHRG4BZgMxwARVXSoiI4AUVZ3pHbtIRJYBB4G7VHWLiJwFvCgiubgkNtK391MkffYZ/PxTLp83HgUdO0Lv3tEIwxhjIs5GUheha1do+NV7vLa9h6truuqqkH+GMcZEi42kLqHly+Gjj2Btsyegfgu38IMxxlQSliACePppOK/a1zT7/St49lnrtWSMqVTsF68QmZlu5dCFRz8J2+vDdddFOyRjjIkom+67EC+8AMfsW0mHtTPcNN61a0c7JGOMiShLEH7s3QvPPw/PNnsKiY2FW26JdkjGGBNxVsXkx5tvgmZmcnG1V91cS7Z8qDGmErISRAGq8NRT8GDj54nZvxfuvDPaIRljTFRYgihg9mxYs2w3/XY/D926QevW0Q7JGGOiwhJEAaNGwYAjXqPGzj/grruiHY4xxkSNtUH4+Okn+OzTg0xtMApOPx3OPjvaIRljTNRYCcLH00/DP6u9R/2s32DQIBCJdkjGGBM1VoLwbNwIb76h/Br3BNRuCZddFu2QjDEmqqwE4Xn+eTg95ysSMr6BO+6AmJhoh2SMMVFlJQhg9263pOhHRz0JB+KgX79oh2SMMVFnJQjgtdegYdavnJYxE26+Gf7yl2iHZIwxUVfpE0RurmucHtloFFSv7hKEMcaY8CYIEekiIr+KyCoRGVzIOf8UkWUislRE3vLZ31dEVnpb33DFuGYN1Niewf9tm4T07QtHHhmujzLGmHIlbG0QIhIDPA9cCKQDi0Rkpu/SoSLSChgCdFLVrSJypLe/ATAMSAQUSPWu3RrqOFu2hMXXP0eVkfttWg1jjPERzhLEacAqVV2tqvuByUD3Auf8B3g+74dfVTd7+y8G5qhqlndsDtAlLFHu2kXMiy8gPXpAq1Zh+QhjjCmPwpkgmgLrfF6ne/t8HQ8cLyJficg3ItKlGNciIv1FJEVEUjIzM0sW5fbtcMEFbmCcMcaYQ6LdzbUq0AroDMQDC0SkbbAXq+p4YDxAYmKiliiCo4+GqVNLdKkxxlRk4SxBrAeO8Xkd7+3zlQ7MVNUDqroGWIFLGMFca4wxJozCmSAWAa1EJEFEqgFJwMwC57yLKz0gIg1xVU6rgdnARSJSX0TqAxd5+4wxxkRI2KqYVDVHRG7B/bDHABNUdamIjABSVHUm+YlgGXAQuEtVtwCIyIO4JAMwQlWzwhWrMcaYw4lqyaruy5rExERNSUmJdhjGGFOuiEiqqib6O1bpR1IbY4zxzxKEMcYYvyxBGGOM8csShDHGGL8qTCO1iGQCa/0cagj8EeFwQs3uoWyweygb7B5Cq7mqNvJ3oMIkiMKISEphLfTlhd1D2WD3UDbYPUSOVTEZY4zxyxKEMcYYvypDghgf7QBCwO6hbLB7KBvsHiKkwrdBGGOMKZnKUIIwxhhTApYgjDHG+FVhE4SIdBGRX0VklYgMjnY8JSUiaSLys4j8ICLlYjZCEZkgIptFZInPvgYiMkdEVnqP9aMZY1EKuYfhIrLe+y5+EJFLohljUUTkGBGZJyLLRGSpiAzw9peb7yLAPZSb70JEaojIdyLyo3cPD3j7E0TkW+83aoq3LEKZUiHbIEQkBrf40IW4RYkWAb1UdVlUAysBEUkDElW1rAyqKZKInAtkA5NU9WRv3+NAlqqO9BJ2fVW9J5pxBlLIPQwHslX1yWjGFiwRaQI0UdXvRaQOkAr0APpRTr6LAPfwT8rJdyEiAtRS1WwRiQW+BAYAdwDvqOpkERkH/KiqY6MZa0EVtQRxGrBKVVer6n5gMtA9yjFVGqq6ACi4fkd34DXv+Wu4/8nLrELuoVxR1Y2q+r33fCewHLe2e7n5LgLcQ7mhTrb3MtbbFPgb8La3v0x+DxU1QTQF1vm8Tqec/UflQ4FPRCRVRPpHO5hSaKyqG73nm4DG0QymFG4RkZ+8KqgyWzVTkIi0ADoA31JOv4sC9wDl6LsQkRgR+QHYDMwBfgO2qWqOd0qZ/I2qqAmiIjlbVf8K/AO42av6KNfU1WuWx7rNsUBLoD2wERgV3XCCIyK1genAQFXd4XusvHwXfu6hXH0XqnpQVdsD8bgajtZRDikoFTVBrAeO8Xkd7+0rd1R1vfe4GZiB+4+rPMrw6pPz6pU3RzmeYlPVDO9/9FzgJcrBd+HVeU8H3lTVd7zd5eq78HcP5fG7AFDVbcA84EygnojkLftcJn+jKmqCWAS08noJVAOSgJlRjqnYRKSW1zCHiNQCLgKWBL6qzJoJ9PWe9wXei2IsJZL3o+q5jDL+XXiNo68Ay1X1KZ9D5ea7KOweytN3ISKNRKSe97wmrvPMclyiuMI7rUx+DxWyFxOA1+3tGSAGmKCqD0c5pGITkWNxpQaAqsBb5eE+RCQZ6Iyb0jgDGAa8C0wFmuGmZf+nqpbZRuBC7qEzrkpDgTTgvz51+WWOiJwNfAH8DOR6u/+Hq8MvF99FgHvoRTn5LkSkHa4ROgb3R/lUVR3h/f89GWgALAauUdV90Yv0cBU2QRhjjCmdilrFZIwxppQsQRhjjPHLEoQxxhi/LEEYY4zxyxKEMcYYvyxBmEpLROJ8ZgPdVGB20JDPrCki80WkRAvVi0gPEWkTivcyJlhViz7FmIpJVbfg+tL7nalVRKr6zJUTbT2AD4ByNyOxKb+sBGGMDxF5VUTGici3wOMi0lJEZnmTJX4hIq298xqJyHQRWeRtnfy8V00RmSwiy0VkBlDT59hFIvK1iHwvItO8uYby1v94XNwaIN+JyHEichbwf8ATXummpfc2V3rnrBCRc8L+j2MqHStBGHO4eOAsVT0oInOBG1R1pYicDryAm6b5WeBpVf1SRJoBs4ETC7zPjcBuVT3RG037PYCINATuBf6uqrtE5B7c2gAjvOu2q2pbEekDPKOql4rITOADVX3bew+Aqqp6mjdrwDDg7+H6BzGVkyUIYw43zUsOtYGzgGneDzJAde/x70Abn/11RaS2z7z/AOcCowFU9ScR+cnbfwbQBvjKu74a8LXPdck+j08HiDNv8r1UoEXQd2dMkCxBGHO4Xd5jFdyc/e39nFMFOENV95bg/QWYo6q9CjmuhTwvKG/enoPY/8smDKwNwphCeOsOrBGRK8HNLCoip3iHPwFuzTtXRPwlkQVAb+/4yUA7b/83QCcROc47VktEjve57iqfx7ySxU6gTqlvyphisARhTGBXA9eLyI/AUvKXrr0NSPRWNFsG3ODn2rFAbRFZjmtfSAVQ1UzcutDJXrXT1/x5AZn63v4BwO3evsnAXSKy2KeR2piwstlcjSlDRCQNSFTVP6IdizFWgjDGGOOXlSCMMcb4ZSUIY4wxflmCMMYY45clCGOMMX5ZgjDGGOOXJQhjjDF+/T/sMt96ANIitwAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VWM4TxNOc-JK" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3luTaYHWc-JL" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=50, max_depth=20, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D_O07pdlc-JL" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "markdown", + "source": [ + "without resampling" + ], + "metadata": { + "id": "qHp6c3c-JRVs" + } + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "S3-6-TzIJL5S", + "outputId": "322475ec-b9f7-494b-ebb4-a4d98f1170e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: drinking\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[128321 21]\n", + " [ 767 169]]\n", + "Accuracy Score : 0.9939046086727826\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 1.00 128342\n", + " 1 0.89 0.18 0.30 936\n", + "\n", + " accuracy 0.99 129278\n", + " macro avg 0.94 0.59 0.65 129278\n", + "weighted avg 0.99 0.99 0.99 129278\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "with resampling" + ], + "metadata": { + "id": "EGXGc4eiJOzS" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "04e534cb-3f76-4cb7-f5c6-85d12fa4e695", + "id": "L00kVPX7c-JL" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: urinating\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[1345 32]\n", + " [ 22 1355]]\n", + "Accuracy Score : 0.9803921568627451\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 0.98 0.98 1377\n", + " 1 0.98 0.98 0.98 1377\n", + "\n", + " accuracy 0.98 2754\n", + " macro avg 0.98 0.98 0.98 2754\n", + "weighted avg 0.98 0.98 0.98 2754\n", + "\n" + ] + } + ], + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0-Uch99Sc-JL" + }, + "source": [ + "##Feature importances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e381d2db-4954-47d5-d2f8-ffd606342900", + "id": "E4B0Src6c-JL" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.02776\n", + "Feature: 1, Score: 0.03776\n", + "Feature: 2, Score: 0.03499\n", + "Feature: 3, Score: 0.03949\n", + "Feature: 4, Score: 0.04147\n", + "Feature: 5, Score: 0.07690\n", + "Feature: 6, Score: 0.07953\n", + "Feature: 7, Score: 0.07173\n", + "Feature: 8, Score: 0.06008\n", + "Feature: 9, Score: 0.06335\n", + "Feature: 10, Score: 0.02206\n", + "Feature: 11, Score: 0.02496\n", + "Feature: 12, Score: 0.02930\n", + "Feature: 13, Score: 0.03450\n", + "Feature: 14, Score: 0.03014\n", + "Feature: 15, Score: 0.08406\n", + "Feature: 16, Score: 0.06953\n", + "Feature: 17, Score: 0.05956\n", + "Feature: 18, Score: 0.05674\n", + "Feature: 19, Score: 0.05608\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ceO4oz_N2EIH" + }, + "source": [ + "# RandomForestAlgorithm Sleeping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EFdJJ2AE3ow9" + }, + "outputs": [], + "source": [ + "BEHAVIOUR='sleeping'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YOpIOHrSsvHE" + }, + "outputs": [], + "source": [ + "y_final = np.stack(signal_features_1[:,3], axis =0) # BEHAVIOUR_1= 'rumination_video'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_seuvyTZ_mUj" + }, + "outputs": [], + "source": [ + "bool_minority_labels = y_final != 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_EppsuEr_mUj" + }, + "outputs": [], + "source": [ + "min_features = X_final[bool_minority_labels]\n", + "maj_features = X_final[~bool_minority_labels]\n", + "\n", + "min_labels = y_final[bool_minority_labels]\n", + "maj_labels = y_final[~bool_minority_labels]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ap6aEYW-_mUj" + }, + "source": [ + "Split the minority class in 70 30 to have more in train set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Skh0o7kg_mUj" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_min_train, X_min_test, y_min_train, y_min_test = train_test_split(min_features, min_labels, test_size=0.3, random_state=1) # 70% training and 30% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zKNbr9Nu_mUj" + }, + "source": [ + "Split the majority class in 50 50 so we can sample from independant sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5yi3qXT1_mUj" + }, + "outputs": [], + "source": [ + "# Split minority dataset into training set and test set\n", + "X_maj_train, X_maj_test, y_maj_train, y_maj_test = train_test_split(maj_features, maj_labels, test_size=0.5, random_state=1) # 50% training and 50% test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fmCTZNzg_mUk" + }, + "source": [ + "## Create train set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ltPgTmcT_mUk" + }, + "source": [ + "Upsample the amount of minority features and labels 3 times (why 3, no idea, feels that this will create enough labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i7Jwg8rr_mUk" + }, + "outputs": [], + "source": [ + "upsampled_X_min_train = np.concatenate([X_min_train, X_min_train, X_min_train], axis=0)\n", + "upsampled_y_min_train = np.concatenate([y_min_train, y_min_train, y_min_train], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "69yarUqk_mUk" + }, + "source": [ + "Downsample from 50% of the majority class to have same amount of samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6ae7b416-b858-48a2-8282-253238368883", + "id": "YI-Vrft3_mUk" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(7125, 30)" + ] + }, + "metadata": {}, + "execution_count": 144 + } + ], + "source": [ + "ids = np.arange(len(X_maj_train))\n", + "choices = np.random.choice(ids, len(upsampled_X_min_train))\n", + "\n", + "res_maj_train_features = X_maj_train[choices]\n", + "res_maj_train_labels = y_maj_train[choices]\n", + "\n", + "res_maj_train_features.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KNamBHi_mUk" + }, + "source": [ + "Concat together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cY9CMFRT_mUl" + }, + "outputs": [], + "source": [ + "resampled_train_features = np.concatenate([upsampled_X_min_train, res_maj_train_features], axis=0)\n", + "resampled_train_labels = np.concatenate([upsampled_y_min_train, res_maj_train_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tZw7mUAV_mUl" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KNBBU1kM_mUl" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_train_labels))\n", + "np.random.shuffle(order)\n", + "resampled_train_features = resampled_train_features[order]\n", + "resampled_train_labels = resampled_train_labels[order]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0GSO5mdj_mUl" + }, + "source": [ + "## Create test set" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HqGBMIvn_mUl" + }, + "source": [ + "We start from the X_min_test" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tmbNhkPH_mUl" + }, + "source": [ + "And downsample the remaining part of the majority class which we didn't use so far" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_qQ2LHYk_mUl" + }, + "outputs": [], + "source": [ + "ids = np.arange(len(X_maj_test))\n", + "choices = np.random.choice(ids, len(X_min_test))\n", + "\n", + "res_maj_test_features = X_maj_test[choices]\n", + "res_maj_test_labels = y_maj_test[choices]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wu5KaKub_mUl" + }, + "source": [ + "Concat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aEIQMSkb_mUm" + }, + "outputs": [], + "source": [ + "resampled_test_features = np.concatenate([X_min_test, res_maj_test_features], axis=0)\n", + "resampled_test_labels = np.concatenate([y_min_test, res_maj_test_labels], axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s5byMjMF_mUm" + }, + "source": [ + "Shuffle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L1uXJCCV_mUm" + }, + "outputs": [], + "source": [ + "order = np.arange(len(resampled_test_labels))\n", + "np.random.shuffle(order)\n", + "resampled_test_features = resampled_test_features[order]\n", + "resampled_test_labels = resampled_test_labels[order]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w8udUQsV_mUm" + }, + "outputs": [], + "source": [ + "X_train= resampled_train_features\n", + "y_train= resampled_train_labels\n", + "X_test= resampled_test_features\n", + "y_test= resampled_test_labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e3NwABt4svHF" + }, + "source": [ + "## Test settings RandomForestAlgorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PEnU2Mn4svHF" + }, + "source": [ + "\n", + "### Test number of trees\n", + "Adopted from https://medium.com/all-things-ai/in-depth-parameter-tuning-for-random-forest-d67bb7e920d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9af7257e-0fab-4875-e275-7c3b052d48cb", + "id": "R5dMXFqnsvHF" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "2\n", + "4\n", + "8\n", + "16\n", + "32\n", + "64\n", + "100\n", + "200\n" + ] + } + ], + "source": [ + "n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]\n", + "train_results = []\n", + "test_results = []\n", + "for estimator in n_estimators:\n", + " print(estimator)\n", + " rf = RandomForestClassifier(n_estimators=estimator, max_features=\"sqrt\")\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(n_estimators, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(n_estimators, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"n_estimators\")\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 280 + }, + "outputId": "49a47e41-0336-4575-e5a0-01d6824d4010", + "id": "hZcAI-kesvHF" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ymQ-Mg_CsvHG" + }, + "source": [ + "### Test max depth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b20d60cd-eb27-49cc-b143-b52206087767", + "id": "a88e3feGsvHG" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0\n", + "2.0\n", + "3.0\n", + "4.0\n", + "5.0\n", + "6.0\n", + "7.0\n", + "8.0\n", + "9.0\n", + "10.0\n", + "11.0\n", + "12.0\n", + "13.0\n", + "14.0\n", + "15.0\n", + "16.0\n", + "17.0\n", + "18.0\n", + "19.0\n", + "20.0\n", + "21.0\n", + "22.0\n", + "23.0\n", + "24.0\n", + "25.0\n", + "26.0\n", + "27.0\n", + "28.0\n", + "29.0\n", + "30.0\n" + ] + } + ], + "source": [ + "max_depths = np.linspace(1, 30, 30, endpoint=True)\n", + "train_results = []\n", + "test_results = []\n", + "for max_depth in max_depths:\n", + " print(max_depth)\n", + " rf = RandomForestClassifier(max_depth=max_depth, max_features=\"sqrt\", n_estimators = 20)\n", + " rf.fit(X_train, y_train)\n", + " train_pred = rf.predict(X_train)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train, train_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " train_results.append(roc_auc)\n", + " y_pred = rf.predict(X_test)\n", + " false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)\n", + " roc_auc = auc(false_positive_rate, true_positive_rate)\n", + " test_results.append(roc_auc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "outputId": "3521d570-5190-4c47-9b4a-6f90cc2d0e14", + "id": "7HxFog3osvHG" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "from matplotlib.legend_handler import HandlerLine2D\n", + "line1, = plt.plot(max_depths, train_results, \"b\", label=\"Train AUC\")\n", + "line2, = plt.plot(max_depths, test_results, \"r\", label=\"Test AUC\")\n", + "plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})\n", + "plt.ylabel(\"AUC score\")\n", + "plt.xlabel(\"Tree depth\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Es_farEcsvHG" + }, + "source": [ + "## Selected RF\n", + "From the previous we select settings for our selected RF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vNtCYve2svHH" + }, + "outputs": [], + "source": [ + "# Random Forest\n", + "rf = RandomForestClassifier(n_estimators=30, max_depth=25, max_features=\"sqrt\")\n", + "# Train Decision Tree Classifer\n", + "rf.fit(X_train,y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = rf.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GVJF1M-6svHH" + }, + "source": [ + "## Confusion matrix and classification report" + ] + }, + { + "cell_type": "markdown", + "source": [ + "without resampling" + ], + "metadata": { + "id": "WwJGmg-vJmTV" + } + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FLVvZUORJc5b", + "outputId": "123d9295-236d-4dd5-b60c-7838a40d1540" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: sleeping\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[128317 25]\n", + " [ 635 301]]\n", + "Accuracy Score : 0.9948947230000464\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 1.00 1.00 1.00 128342\n", + " 1 0.92 0.32 0.48 936\n", + "\n", + " accuracy 0.99 129278\n", + " macro avg 0.96 0.66 0.74 129278\n", + "weighted avg 0.99 0.99 0.99 129278\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "with resampling" + ], + "metadata": { + "id": "K5FCBO0iJbNy" + } + }, + { + "cell_type": "code", + "source": [ + "### Evaluation\n", + "print(\"Behaviour: \"+ str(BEHAVIOUR))\n", + "print (\"Windows_size: \" + str(WIN_SIZE/2) +\" sec \" \"(with overlap: \"+ str(WIN_OVERLAP/2)+ \" sec)\")\n", + "\n", + "results = confusion_matrix(y_test,y_pred)\n", + "\n", + "print('Confusion Matrix Validation:')\n", + "print(results)\n", + "print('Accuracy Score : ', accuracy_score(y_test, y_pred))\n", + "print('Report : ')\n", + "print(classification_report(y_test, y_pred))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8b41438a-36dc-453b-d6c7-f2497865b62f", + "id": "KNGGVZ5BsvHH" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Behaviour: sleeping\n", + "Windows_size: 120.0 sec (with overlap: 0.5 sec)\n", + "Confusion Matrix Validation:\n", + "[[1014 5]\n", + " [ 9 1010]]\n", + "Accuracy Score : 0.9931305201177625\n", + "Report : \n", + " precision recall f1-score support\n", + "\n", + " 0 0.99 1.00 0.99 1019\n", + " 1 1.00 0.99 0.99 1019\n", + "\n", + " accuracy 0.99 2038\n", + " macro avg 0.99 0.99 0.99 2038\n", + "weighted avg 0.99 0.99 0.99 2038\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5999e508-a35d-41e5-ad0c-b31412741b49", + "id": "yLASMtoVBzU7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature: 0, Score: 0.02129\n", + "Feature: 1, Score: 0.04114\n", + "Feature: 2, Score: 0.02506\n", + "Feature: 3, Score: 0.03261\n", + "Feature: 4, Score: 0.05568\n", + "Feature: 5, Score: 0.07470\n", + "Feature: 6, Score: 0.10356\n", + "Feature: 7, Score: 0.06482\n", + "Feature: 8, Score: 0.05784\n", + "Feature: 9, Score: 0.05600\n", + "Feature: 10, Score: 0.01779\n", + "Feature: 11, Score: 0.02446\n", + "Feature: 12, Score: 0.02423\n", + "Feature: 13, Score: 0.03230\n", + "Feature: 14, Score: 0.03214\n", + "Feature: 15, Score: 0.06984\n", + "Feature: 16, Score: 0.07154\n", + "Feature: 17, Score: 0.06244\n", + "Feature: 18, Score: 0.06634\n", + "Feature: 19, Score: 0.06623\n", + "Feature: 20, Score: 0.00000\n", + "Feature: 21, Score: 0.00000\n", + "Feature: 22, Score: 0.00000\n", + "Feature: 23, Score: 0.00000\n", + "Feature: 24, Score: 0.00000\n", + "Feature: 25, Score: 0.00000\n", + "Feature: 26, Score: 0.00000\n", + "Feature: 27, Score: 0.00000\n", + "Feature: 28, Score: 0.00000\n", + "Feature: 29, Score: 0.00000\n" + ] + } + ], + "source": [ + "# feature importance\n", + "model = rf\n", + "importance = model.feature_importances_\n", + "# summarize feature importance\n", + "for i,v in enumerate(importance):\n", + "\tprint('Feature: %0d, Score: %.5f' % (i,v))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TDTHlLika75i" + }, + "source": [ + "# Descriptives" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c0bPYEvp2inT" + }, + "source": [ + "## Counting data PeakDetectionAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OpEh0QcVa1s1" + }, + "outputs": [], + "source": [ + "final_data_peak_detection.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i-LVt3vzox2L" + }, + "source": [ + "##Counting data RandomForestAlgorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v6pTcov0o82N" + }, + "outputs": [], + "source": [ + "final_data_rf.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A66gTdkKdilR" + }, + "source": [ + "## Number of observations video per behavior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yPBhYGSvqCzk" + }, + "outputs": [], + "source": [ + "np.unique(final_data_peak_detection['behaviour'], return_counts=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8cX6v6jKeMMN" + }, + "source": [ + "## Number of observations video per animal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nipbuDNyeQyy" + }, + "outputs": [], + "source": [ + "np.unique(final_data_peak_detection['cow_number'], return_counts=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5_lyl4Jee5kd" + }, + "source": [ + "## Time between two contraction cycles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V9hyOD4iKyd3" + }, + "outputs": [], + "source": [ + "final_data_rum= final_data_peak_detection[final_data_peak_detection.behaviour==1]\n", + "final_data_eat= final_data_peak_detection[final_data_peak_detection.behaviour==2]\n", + "final_data_drink= final_data_peak_detection[final_data_peak_detection.behaviour==3]\n", + "final_data_sleep= final_data_peak_detection[final_data_peak_detection.behaviour==4]\n", + "final_data_other= final_data_peak_detection[final_data_peak_detection.behaviour==5]\n", + "\n", + "print(\"Time between contraction cycles during rumination:\")\n", + "print(\"mean:\"+str(final_data_rum['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_rum['low_time'].std()))\n", + "print(\"median:\"+str(final_data_rum['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_rum['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during eating:\")\n", + "print(\"mean:\"+str(final_data_eat['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_eat['low_time'].std()))\n", + "print(\"median:\"+str(final_data_eat['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_eat['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during drinking:\")\n", + "print(\"mean:\"+str(final_data_drink['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_drink['low_time'].std()))\n", + "print(\"median:\"+str(final_data_drink['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_drink['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during sleeping:\")\n", + "print(\"mean:\"+str(final_data_sleep['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_sleep['low_time'].std()))\n", + "print(\"median:\"+str(final_data_sleep['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_sleep['low_time']))+\"\\n\")\n", + "\n", + "print(\"Time between contraction cycles during other behaviour:\")\n", + "print(\"mean:\"+str(final_data_other['low_time'].mean()))\n", + "print(\"std:\"+str(final_data_other['low_time'].std()))\n", + "print(\"median:\"+str(final_data_other['low_time'].median()))\n", + "print(\"iqr:\"+ str(iqr(final_data_other['low_time']))+\"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rwhIalEF3upw" + }, + "source": [ + "## Boxplots of time between two contraction cycles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DuLtGgqCrH-8" + }, + "outputs": [], + "source": [ + "sns.boxplot(x=\"behaviour\", y=\"low_time\",orient='v', data=final_data_peak_detection_corrected, showfliers=False)\n", + "plt.xlabel('Behaviour')\n", + "plt.ylabel('Time interval between contractions (s)')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KfUcr61726oD" + }, + "source": [ + "## GLM model eating and rumination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FHg0fxq5lxiI" + }, + "outputs": [], + "source": [ + "#create csv for GLM model\n", + "final_data_HK_EAT_REST = final_data_peak_detection_corrected[final_data_peak_detection_corrected.behaviour<3.0]\n", + "final_data_HK_EAT= final_data_HK_EAT_REST[final_data_peak_detection_corrected.behaviour>=1.0]\n", + "result_file='/content/gdrive/Shared drives/Bovi-Analytics/Projects/JosjeScheurwater/ProjectPicoLog/data/out/final_data_HK_EAT.csv'\n", + "final_data_HK_EAT.to_csv(result_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h5Qs49vTEoHh" + }, + "outputs": [], + "source": [ + "%load_ext rpy2.ipython" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "685HmbZfzN7H" + }, + "outputs": [], + "source": [ + "%%R\n", + "url = ('/content/gdrive/Shareddrives/Bovi-Analytics/Projects/JosjeScheurwater/ProjectPicoLog/data/out/final_data_HK_EAT.csv')\n", + "dataset = read.csv(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xlT1I1MCvjNd" + }, + "outputs": [], + "source": [ + "%%R\n", + "package_list <- c(\"dplyr\",\n", + " \"lme4\"\n", + " )\n", + "\n", + "for (pkg in package_list) {\n", + " if (pkg %in% rownames(installed.packages()) == FALSE)\n", + " {install.packages(pkg, dependencies = TRUE)}\n", + " if (pkg %in% rownames(.packages()) == FALSE)\n", + " {library(pkg, character.only = TRUE)}\n", + " print(citation(pkg))\n", + "\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1yRAR2qOvjNk" + }, + "outputs": [], + "source": [ + "%%R\n", + "install.packages(\"lmerTest\", dependencies = TRUE)\n", + "library (\"lmerTest\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fNQlFbR9vjNl" + }, + "outputs": [], + "source": [ + "%%R\n", + "gl<- glm('low_time ~ factor(behaviour)+factor(cow_number)', data = dataset, family = gaussian)\n", + "summary(gl)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true, + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file