e-mission · shankari · Jul 24, 2021 · Jun 2, 2021 · Jun 13, 2021 · Jun 13, 2021
diff --git a/tour_model_eval/first_second_round_evaluation.ipynb b/tour_model_eval/first_second_round_evaluation.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "mighty-ukraine",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "storage not configured, falling back to sample, default configuration\n",
+      "Connecting to database URL localhost\n"
+     ]
+    }
+   ],
+   "source": [
+    "# import logging\n",
+    "import emission.core.get_database as edb\n",
+    "import emission.analysis.modelling.tour_model.similarity as similarity\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import get_request_percentage as grp\n",
+    "import get_scores as gs\n",
+    "import label_processing as lp\n",
+    "import get_users as gu\n",
+    "import data_preprocessing as preprocess\n",
+    "import get_tuning_score as tuning\n",
+    "import evaluation_pipeline as ep\n",
+    "import matplotlib.pyplot as plt\n",
+    "import get_plot as plot\n",
+    "import emission.core.common as ecc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cathedral-pointer",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "participant_uuid_obj = list(edb.get_profile_db().find({\"install_group\": \"participant\"}, {\"user_id\": 1, \"_id\": 0}))\n",
+    "all_users = [u[\"user_id\"] for u in participant_uuid_obj]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "exotic-livestock",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "radius = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "powered-airfare",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "user 1 filter_trips len 207\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get all/valid user list\n",
+    "user_ls, valid_users = gu.get_user_ls(all_users, radius)\n",
+    "\n",
+    "# collect request percentage for the first or second round (requested trips / total trips) for all users\n",
+    "all_percentage_first_tune = []\n",
+    "all_percentage_first_test = []\n",
+    "all_percentage_second_tune = []\n",
+    "all_percentage_second_test = []\n",
+    "\n",
+    "# collect homogeneity score for the first/second round for all users\n",
+    "all_homogeneity_score_first_tune = []\n",
+    "all_homogeneity_score_first_test = []\n",
+    "all_homogeneity_score_second_tune = []\n",
+    "all_homogeneity_score_second_test = []\n",
+    "\n",
+    "for a in range(len(all_users)):\n",
+    "    user = all_users[a]\n",
+    "    trips = preprocess.read_data(user)\n",
+    "    filter_trips = preprocess.filter_data(trips, radius)\n",
+    "    print('user', a + 1, 'filter_trips len', len(filter_trips))\n",
+    "\n",
+    "    # filter out users that don't have enough valid labeled trips\n",
+    "    if not gu.valid_user(filter_trips, trips):\n",
+    "        continue\n",
+    "    tune_idx, test_idx = preprocess.split_data(filter_trips)\n",
+    "\n",
+    "    # choose tuning/test set to run the model\n",
+    "    # this step will use KFold (5 splits) to split the data into different subsets\n",
+    "    # - tune: tuning set\n",
+    "    # - test: test set\n",
+    "    # Here we user a bigger part of the data for testing and a smaller part for tuning\n",
+    "    tune_data = preprocess.get_subdata(filter_trips, test_idx)\n",
+    "    test_data = preprocess.get_subdata(filter_trips, tune_idx)\n",
+    "\n",
+    "    pct_collect_first, homo_collect_first, pct_collect_second, homo_collect_second = ep.init_score()\n",
+    "   \n",
+    "    # collect tuning parameters\n",
+    "    coll_tune_score = []\n",
+    "    coll_tradeoffs = []\n",
+    "    # tuning\n",
+    "    pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,  coll_tune_score= ep.tuning_test(tune_data,radius,pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,coll_tune_score,tune = True)\n",
+    "\n",
+    "    pct_collect_first, homo_collect_first, pct_collect_second, homo_collect_second = ep.init_score()\n",
+    "    # testing\n",
+    "    pct_collect_first,homo_collect_first,pct_collect_second,homo_collect_second,coll_tradeoffs,coll_tune_score = ep.tuning_test(test_data,radius, pct_collect_first, homo_collect_first,pct_collect_second,homo_collect_second, coll_tradeoffs,coll_tune_score,test=True)\n",
+    "\n",
+    "    print('colle_tune_score ', coll_tune_score)\n",
+    "    print('coll_tradeoffs',coll_tradeoffs)\n",
+    "\n",
+    "    # collect request percentage for the first round for all users\n",
+    "    all_percentage_first_test.append(pct_collect_first)\n",
+    "\n",
+    "    # collect homogeneity score for the first round for all users\n",
+    "    all_homogeneity_score_first_test.append(homo_collect_first)\n",
+    "\n",
+    "    # collect request percentage for the second round for all users\n",
+    "    all_percentage_second_test.append(pct_collect_second)\n",
+    "\n",
+    "    # collect homogeneity score for the second round for all users\n",
+    "    all_homogeneity_score_second_test.append(homo_collect_second)\n",
+    "\n",
+    "print('all_percentage_first_test', all_percentage_first_test)\n",
+    "print('all_homogeneity_score_first_test', all_homogeneity_score_first_test)\n",
+    "print('all_percentage_second_test', all_percentage_second_test)\n",
+    "print('all_homogeneity_score_second_test', all_homogeneity_score_second_test)\n",
+    "\n",
+    "# plot evaluation scatter for the first round\n",
+    "plt.figure()\n",
+    "plot.get_scatter(all_percentage_first_test, all_homogeneity_score_first_test, valid_users)\n",
+    "\n",
+    "# plot evaluation scatter for the second round\n",
+    "plt.figure()\n",
+    "plot.get_scatter(all_percentage_second_test, all_homogeneity_score_second_test, valid_users)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "variable-faculty",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}