diff --git a/compile.py b/compile.py index 4707f33..81df742 100644 --- a/compile.py +++ b/compile.py @@ -67,11 +67,11 @@ def run_notebooks( nb_in = nbparam.replace_definitions(nb_in, new_params) # execute it - ep = ExecutePreprocessor(timeout=600, kernel_name="python3") + ep = ExecutePreprocessor(timeout=1000, kernel_name="python3") ep.preprocess(nb_in) # strip out all metadata that causes issues for Quarto - cmp = ClearMetadataPreprocessor(timeout=600, kernel_name="python3") + cmp = ClearMetadataPreprocessor(timeout=1000, kernel_name="python3") # exclude cell tags from removal cmp.preserve_cell_metadata_mask |= {"tags"} cmp.preprocess(nb_in, resources={}) diff --git a/notebooks/daily.ipynb b/notebooks/daily.ipynb index 0258718..541a526 100644 --- a/notebooks/daily.ipynb +++ b/notebooks/daily.ipynb @@ -139,10 +139,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_persons_df)}\")\n", + "print(f\"Filtering all persons by home MAZ. Original persons: {len(base_persons_df)}\")\n", "base_persons_df = base_persons_df[single_filter_mazs(base_persons_df.home_zone_id)]\n", "build_persons_df = build_persons_df[single_filter_mazs(build_persons_df.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_persons_df)}\")" + "print(f\"Total persons after filtering: {len(base_persons_df)}\")" ] }, { @@ -218,10 +218,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_persons_df)}\")\n", + "print(f\"Filtering workers and students by home MAZ. Original workers/students: {len(base_persons_df)}\")\n", "base_persons_df = base_persons_df[single_filter_mazs(base_persons_df.home_zone_id)]\n", "build_persons_df = build_persons_df[single_filter_mazs(build_persons_df.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_persons_df)}\")" + "print(f\"Total workers/students after filtering: {len(base_persons_df)}\")" ] }, { @@ -334,10 +334,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_persons_df)}\")\n", + "print(f\"Filtering all persons by home MAZ. Original persons: {len(base_persons_df)}\")\n", "base_persons_df = base_persons_df[single_filter_mazs(base_persons_df.home_zone_id)]\n", "build_persons_df = build_persons_df[single_filter_mazs(build_persons_df.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_persons_df)}\")" + "print(f\"Total persons after filtering: {len(base_persons_df)}\")" ] }, { diff --git a/notebooks/joint.ipynb b/notebooks/joint.ipynb index 60eb573..da8f7cd 100644 --- a/notebooks/joint.ipynb +++ b/notebooks/joint.ipynb @@ -124,9 +124,9 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"\"\"Filtering tours by origin {\n", + "print(f\"\"\"Filtering joint tours by origin {\n", " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\"\"\")\n", + " } destination MAZ.\\nOriginal joint tours in base: {len(base_tour[base_tour.number_of_participants > 1])}\\tbuild: {len(build_tour[build_tour.number_of_participants > 1])}\"\"\")\n", "\n", "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", "\n", @@ -139,7 +139,7 @@ " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", " )]\n", "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" + "print(f\"After filtering, joint tours in base: {len(base_tour[base_tour.number_of_participants > 1])}\\tbuild: {len(build_tour[build_tour.number_of_participants > 1])}\")" ] }, { @@ -389,7 +389,7 @@ " left_index=True,\n", " right_index=True,\n", " suffixes=('_base', '_build'),\n", - " how='inner'\n", + " how='outer'\n", ")\n", "\n", "# get the difference in start and end times\n", @@ -448,18 +448,19 @@ " 3: \"PM\",\n", " 4: \"EV\"\n", "}\n", - "tp_order = ['EA','AM','MD','PM','EV','Total']\n", + "tp_order_base = ['EA','AM','MD','PM','EV','Newly created','Total']\n", + "tp_order_build = ['EA','AM','MD','PM','EV','Removed','Total']\n", "\n", "for metric in ['start', 'end']:\n", " purpose_df = df\n", " display(Markdown(f\"### Joint tour {metric} changes\"))\n", " xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", " )\n", - " display(xtab.loc[tp_order,tp_order])\n", + " display(xtab.loc[tp_order_base,tp_order_build])\n", "\n", " display(Markdown(\" \"))\n", "\n" diff --git a/notebooks/long_term.ipynb b/notebooks/long_term.ipynb index ac3eda1..6290c00 100644 --- a/notebooks/long_term.ipynb +++ b/notebooks/long_term.ipynb @@ -25,6 +25,7 @@ "import pandas as pd\n", "import yaml\n", "import plotly.io as pio\n", + "import numpy as np\n", "pio.renderers.default = \"plotly_mimetype+notebook_connected\"\n", "\n", "from IPython.display import Markdown, display\n", @@ -185,10 +186,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_per)}\")\n", + "print(f\"Filtering workers by home MAZ. Original workers: {len(base_per[base_per.is_worker])}\")\n", "base_per = base_per[single_filter_mazs(base_per.home_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Workers after filtering: {len(base_per[base_per.is_worker])}\")" ] }, { @@ -238,10 +239,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_per)}\")\n", + "print(f\"Filtering workers by home MAZ. Original workers: {len(base_per[base_per.is_worker])}\")\n", "base_per = base_per[single_filter_mazs(base_per.home_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Workers after filtering: {len(base_per[base_per.is_worker])}\")" ] }, { @@ -303,10 +304,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_per)}\")\n", + "print(f\"Filtering students by home MAZ. Original students: {len(base_per[base_per.is_student])}\")\n", "base_per = base_per[single_filter_mazs(base_per.home_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Students after filtering: {len(base_per[base_per.is_student])}\")" ] }, { @@ -370,10 +371,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by home MAZ. Original Persons: {len(base_per)}\")\n", + "print(f\"Filtering all persons by home MAZ. Original persons: {len(base_per)}\")\n", "base_per = base_per[single_filter_mazs(base_per.home_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.home_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Total persons after filtering: {len(base_per)}\")" ] }, { @@ -531,10 +532,10 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by workplace MAZ. Original Persons: {len(base_per)}\")\n", + "print(f\"Filtering workers by workplace MAZ. Original workers in base scenario: {len(base_per[base_per.is_worker])}\")\n", "base_per = base_per[single_filter_mazs(base_per.workplace_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.workplace_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Workers in base scenario after filtering: {len(base_per[base_per.is_worker])}\")" ] }, { @@ -545,7 +546,7 @@ "outputs": [], "source": [ "df = base_per[base_per.is_worker].merge(build_per[build_per.is_worker],\n", - " how='left',\n", + " how='outer',\n", " left_index=True,\n", " right_index=True, \n", " suffixes=('_base','_build'))" @@ -559,12 +560,13 @@ "outputs": [], "source": [ "df0 = pd.crosstab(\n", - " base_per[base_per.is_worker].free_parking_at_work,\n", - " build_per[build_per.is_worker].free_parking_at_work,\n", + " df.free_parking_at_work_base.map({False:\"No free parking\",True:\"Has free parking\",np.nan:\"Workplace moved into area\"}),\n", + " df.free_parking_at_work_build.map({False:\"No free parking\",True:\"Has free parking\",np.nan:\"Workplace moved out of area\"}),\n", " rownames=['base'],\n", " colnames=['build'],\n", " margins=True,\n", - " margins_name='Total'\n", + " margins_name='Total',\n", + " dropna=False,\n", ")\n", "df0" ] @@ -576,9 +578,13 @@ "metadata": {}, "outputs": [], "source": [ - "df = (df.free_parking_at_work_base == df.free_parking_at_work_build).value_counts()\n", - "df.index = df.index.map({True:'Unchanged',False:'Changed'})\n", - "df" + "df.loc[df.free_parking_at_work_base == df.free_parking_at_work_build,\"Case\"] = \"Unchanged\"\n", + "df.loc[df.free_parking_at_work_base != df.free_parking_at_work_build,\"Case\"] = \"Changed\"\n", + "\n", + "df.loc[df.free_parking_at_work_base.isna(),\"Case\"] = \"Workplace moved into area\"\n", + "df.loc[df.free_parking_at_work_build.isna(),\"Case\"] = \"Workplace moved out of area\"\n", + "\n", + "df.Case.value_counts().sort_index()" ] }, { @@ -588,7 +594,7 @@ "metadata": {}, "outputs": [], "source": [ - "fig = vh.create_pie_chart(df.to_frame().sort_index(),[\"count\"])\n", + "fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(),[\"count\"])\n", "fig.show()" ] }, @@ -607,7 +613,7 @@ "metadata": {}, "outputs": [], "source": [ - "usecols = ['person_id', 'is_worker', 'telecommute_frequency', 'workplace_zone_id']\n", + "usecols = ['person_id', 'is_worker', 'telecommute_frequency', 'workplace_zone_id', 'work_from_home']\n", "base_per = pd.read_csv(f\"{base_dir}/final_persons.csv\",\n", " index_col='person_id',\n", " usecols=usecols)\n", @@ -635,10 +641,12 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Filtering persons by workplace MAZ. Original Persons: {len(base_per)}\")\n", + "base_tele_mask = (base_per.is_worker) & ~(base_per.work_from_home)\n", + "build_tele_mask = (build_per.is_worker) & ~(build_per.work_from_home)\n", + "print(f\"Filtering workers by workplace MAZ who do not work from home. Original workers in base scenario: {len(base_per[base_tele_mask])}\")\n", "base_per = base_per[single_filter_mazs(base_per.workplace_zone_id)]\n", "build_per = build_per[single_filter_mazs(build_per.workplace_zone_id)]\n", - "print(f\"Persons after filtering: {len(base_per)}\")" + "print(f\"Workers in base scenario after filtering: {len(base_per[base_tele_mask])}\")" ] }, { @@ -648,8 +656,8 @@ "metadata": {}, "outputs": [], "source": [ - "df = base_per[base_per.is_worker].merge(build_per[build_per.is_worker],\n", - " how='left',\n", + "df = base_per[base_tele_mask].merge(build_per[build_tele_mask],\n", + " how='outer',\n", " left_index=True,\n", " right_index=True, \n", " suffixes=('_base','_build'))" @@ -663,14 +671,18 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.telecommute_frequency_base,\n", - " df.telecommute_frequency_build,\n", + " df.telecommute_frequency_base.fillna(\"Workplace moved into area\"),\n", + " df.telecommute_frequency_build.fillna(\"Workplace moved out of area\"),\n", " rownames=['base'],\n", " colnames=['build'],\n", " margins=True,\n", - " margins_name='Total'\n", - ").sort_index()\n", - "xtab" + " margins_name='Total',\n", + " dropna=False,\n", + ")\n", + "xtab.loc[\n", + " filter(lambda x: x in xtab.index, ['0 (No Telecommute)','1 Day per Week','2-3 Days per Week','4 Days per Week','Workplace moved into area','Total']),\n", + " filter(lambda x: x in xtab.columns, ['0 (No Telecommute)','1 Day per Week','2-3 Days per Week','4 Days per Week','Workplace moved out of area','Total'])\n", + " ]" ] }, { @@ -688,6 +700,8 @@ "df1.loc[base_tc_magnitude == build_tc_magnitude, 'case'] = 'Unchanged'\n", "df1.loc[base_tc_magnitude > build_tc_magnitude, 'case'] = 'Decreased'\n", "df1.loc[base_tc_magnitude < build_tc_magnitude, 'case'] = 'Increased'\n", + "df1.loc[df1.telecommute_frequency_base.isna() & (~df1.telecommute_frequency_build.isna()),'case'] = \"Workplace moved into area\"\n", + "df1.loc[df1.telecommute_frequency_build.isna() & (~df1.telecommute_frequency_base.isna()),'case'] = \"Workplace moved out of area\"\n", "\n", "fig = vh.create_pie_chart(df1.case.value_counts().to_frame().sort_index(), [\"count\"])\n", "fig.show()" @@ -764,7 +778,7 @@ ], "metadata": { "kernelspec": { - "display_name": "asimviz", + "display_name": "asim_eet_viz", "language": "python", "name": "python3" }, diff --git a/notebooks/tours.ipynb b/notebooks/tours.ipynb index 56855ce..1324dfb 100644 --- a/notebooks/tours.ipynb +++ b/notebooks/tours.ipynb @@ -88,15 +88,16 @@ "metadata": {}, "outputs": [], "source": [ + "cols_to_use = ['tour_id','tour_category','origin','destination','primary_purpose','person_id','start','end','tour_type','tour_mode','parent_tour_id','stop_frequency']\n", "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','person_id','start','end','tour_type'])\n", + " usecols=cols_to_use)\n", "\n", "base_tour_idx = base_tour.index\n", "\n", "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','person_id','start','end','tour_type'])\n" + " usecols=cols_to_use)\n" ] }, { @@ -106,7 +107,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"\"\"Filtering tours by origin {\n", + "print(f\"\"\"Filtering all tours by origin {\n", " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\\tdiff: {len(build_tour)-len(base_tour)}\"\"\")\n", "\n", @@ -121,7 +122,7 @@ " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", " )]\n", "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\\tdiff: {len(build_tour)-len(base_tour)}\")" + "print(f\"After filtering, total tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\\tdiff: {len(build_tour)-len(base_tour)}\")" ] }, { @@ -284,8 +285,6 @@ "source": [ "# keep only work and school tours\n", "base_tours_df = base_tour[base_tour.tour_type.isin(['work', 'school'])]\n", - "base_tour_idx = base_tours_df.index\n", - "\n", "build_tours_df = build_tour[build_tour.tour_type.isin(['work', 'school'])]" ] }, @@ -319,7 +318,7 @@ " left_index=True,\n", " right_index=True,\n", " suffixes=('_base', '_build'),\n", - " how='inner'\n", + " how='outer'\n", ")\n", "\n", "# get the difference in start and end times\n", @@ -372,14 +371,16 @@ " 3: \"PM\",\n", " 4: \"EV\"\n", "}\n", - "tp_order = ['EA','AM','MD','PM','EV','Total']\n", + "tp_order_base = ['EA','AM','MD','PM','EV','Newly created','Total']\n", + "tp_order_build = ['EA','AM','MD','PM','EV','Removed','Total']\n", + "\n", "xtab = pd.crosstab(\n", - " df.start_period_base.replace(mapper),\n", - " df.start_period_build.replace(mapper),\n", + " df.start_period_base.replace(mapper).fillna('Newly created'),\n", + " df.start_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab.loc[tp_order,tp_order])" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -402,16 +403,16 @@ "\n", "for purpose in ['work', 'school']:\n", " for metric in ['start']:\n", - " purpose_df = df[df.tour_type_base == purpose]\n", + " purpose_df = df[(df.tour_type_base == purpose) | (df.tour_type_build == purpose)]\n", " display(Markdown(f\"### {purpose.capitalize()}\"))\n", "\n", " xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", " )\n", - " display(xtab.loc[tp_order,tp_order])\n", + " display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))\n", "\n", "\n", " summary_text = f\"\"\"**{purpose.capitalize()} tour {metric} changes:**\n", @@ -457,12 +458,12 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.end_period_base.replace(mapper),\n", - " df.end_period_build.replace(mapper),\n", + " df.end_period_base.replace(mapper).fillna('Newly created'),\n", + " df.end_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab.loc[tp_order,tp_order])" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -485,16 +486,16 @@ "\n", "for purpose in ['work', 'school']:\n", " for metric in ['end']:\n", - " purpose_df = df[df.tour_type_base == purpose]\n", + " purpose_df = df[(df.tour_type_base == purpose) | (df.tour_type_build == purpose)]\n", " display(Markdown(f\"### {purpose.capitalize()}\"))\n", "\n", " xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", " )\n", - " display(xtab.loc[tp_order,tp_order])\n", + " display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))\n", "\n", "\n", " summary_text = f\"\"\"**{purpose.capitalize()} tour {metric} changes:**\n", @@ -574,7 +575,7 @@ " left_index=True,\n", " right_index=True,\n", " suffixes=('_base', '_build'),\n", - " how='inner'\n", + " how='outer'\n", ")\n", "\n", "# get the difference in start and end times\n", @@ -621,12 +622,12 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.start_period_base.replace(mapper),\n", - " df.start_period_build.replace(mapper),\n", + " df.start_period_base.replace(mapper).fillna('Newly created'),\n", + " df.start_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab)" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -652,18 +653,18 @@ "# above comment is needed for Quarto to render subtabs correctly\n", "metric='start'\n", "for key, value in name_dict.items():\n", - " purpose_df = df[df.primary_purpose_base == key]\n", + " purpose_df = df[(df.primary_purpose_base == key) | (df.primary_purpose_build == key)]\n", " if len(purpose_df) == 0:\n", " continue\n", " display(Markdown(f\"### {key.capitalize()}\"))\n", "\n", " xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", " )\n", - " display(xtab)\n", + " display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))\n", "\n", "\n", "\n", @@ -705,12 +706,12 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.end_period_base.replace(mapper),\n", - " df.end_period_build.replace(mapper),\n", + " df.end_period_base.replace(mapper).fillna('Newly created'),\n", + " df.end_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab)" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -736,18 +737,18 @@ "# above comment is needed for Quarto to render subtabs correctly\n", "metric='end'\n", "for key, value in name_dict.items():\n", - " purpose_df = df[df.primary_purpose_base == key]\n", + " purpose_df = df[(df.primary_purpose_base == key) | (df.primary_purpose_build == key)]\n", " if len(purpose_df) == 0:\n", " continue\n", " display(Markdown(f\"### {key.capitalize()}\"))\n", "\n", " xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", " )\n", - " display(xtab)\n", + " display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))\n", "\n", " summary_text = f\"\"\"**{key.capitalize()} tour {metric} changes:**\n", " Tours which arrived earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}\n", @@ -791,48 +792,6 @@ "# xtab base-vs build mode choice for NM tours by purpose" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "5752ee4a-e49d-4842-876e-435937812ab5", - "metadata": {}, - "outputs": [], - "source": [ - "usecols = ['tour_id','tour_category','origin','destination','tour_mode', 'tour_category', 'primary_purpose']\n", - "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=usecols)\n", - "\n", - "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", - " index_col='tour_id',\n", - " usecols=usecols)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f592846-8521-479e-9435-e30edd401a12", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\"\"Filtering tours by origin {\n", - " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\"\"\")\n", - "\n", - "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", - "\n", - "# special build-case filtering\n", - "build_tour = build_tour[(\n", - " build_tour.index.isin(base_tour_idx) # originally existed in base\n", - " & build_tour.index.isin(base_tour.index) # and was in the current set for the base\n", - " ) | # OR\n", - " ((~build_tour.index.isin(base_tour_idx)) # is a new tour \n", - " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", - " )]\n", - "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -901,10 +860,14 @@ " \"TNC_SINGLE\": \"RIDESHARE\",\n", "\n", "}\n", - "order = ['DRIVEALONE','SHARED2','SHARED3',\n", + "mode_order_base = ['DRIVEALONE','SHARED2','SHARED3',\n", " 'WALK','BIKE','MICROMOBILITY','WALK_TRANSIT',\n", " 'PNR_TRANSIT','KNR_TRANSIT','TNC_TRANSIT',\n", - " 'RIDESHARE','SCH_BUS','Newly created','Removed',\"Total\",]\n" + " 'RIDESHARE','SCH_BUS','Newly created',\"Total\",]\n", + "mode_order_build = ['DRIVEALONE','SHARED2','SHARED3',\n", + " 'WALK','BIKE','MICROMOBILITY','WALK_TRANSIT',\n", + " 'PNR_TRANSIT','KNR_TRANSIT','TNC_TRANSIT',\n", + " 'RIDESHARE','SCH_BUS','Removed',\"Total\",]\n" ] }, { @@ -914,16 +877,13 @@ "metadata": {}, "outputs": [], "source": [ - "# xtab = df[[\"tour_mode_base\", \"tour_mode_build\"]].replace(combiners).value_counts(dropna=False).unstack().fillna(0)\n", - "# xtab.loc[order,order]\n", - "order = pd.Series(order)\n", "xtab = pd.crosstab(\n", " df.tour_mode_base.replace(combiners).fillna('Newly created'),\n", " df.tour_mode_build.replace(combiners).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total')\n", "\n", - "display(xtab.loc[order[order.isin(xtab.index)],order[order.isin(xtab.columns)]])" + "display(xtab.reindex(index=mode_order_base,columns=mode_order_build,fill_value=0))" ] }, { @@ -1052,7 +1012,7 @@ "# only trips that exist in the base run will be output\n", "# looping thru name_dict will not find cases where the base value for purpose or category is nan in the outer join\n", "for key, value in name_dict.items():\n", - " df_purp = df.loc[(df.primary_purpose_base == key)]\n", + " df_purp = df.loc[(df.primary_purpose_base == key)|(df.primary_purpose_build == key)]\n", " if len(df_purp) == 0:\n", " continue \n", " \n", @@ -1074,6 +1034,142 @@ ":::" ] }, + { + "cell_type": "markdown", + "id": "b24b7dfb", + "metadata": {}, + "source": [ + "## Stop Frequency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad15ecd1", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.merge(\n", + " base_tour,\n", + " build_tour,\n", + " how='outer',\n", + " left_index=True,\n", + " right_index=True,\n", + " suffixes=['_base', '_build']\n", + ")\n", + "\n", + "df.loc[df.stop_frequency_base == df.stop_frequency_build,'Case'] = \"Unchanged\"\n", + "df.loc[df.stop_frequency_base != df.stop_frequency_build,'Case'] = \"Changed\"\n", + "\n", + "df.loc[df.stop_frequency_base.isna(),'Case'] = \"Newly Created\"\n", + "df.loc[df.stop_frequency_build.isna(),\"Case\"] = \"Removed\"\n", + "\n", + "df.Case.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "791905f1", + "metadata": {}, + "outputs": [], + "source": [ + "total_stops_base = (df.stop_frequency_base.str[0].astype(float) \n", + " + df.stop_frequency_base.str[5].astype(float)\n", + " ).fillna(-np.inf).clip(upper=4)\n", + "\n", + "total_stops_build = (df.stop_frequency_build.str[0].astype(float) \n", + " + df.stop_frequency_build.str[5].astype(float)\n", + " ).fillna(-np.inf).clip(upper=4)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0766c89", + "metadata": {}, + "outputs": [], + "source": [ + "fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(), [\"count\"])\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "359f52ef", + "metadata": {}, + "outputs": [], + "source": [ + "diff = total_stops_build - total_stops_base\n", + "xtab = pd.crosstab(\n", + " total_stops_base.replace({4:\"4+\",-np.inf:\"Newly created\"}).astype(str).str.replace(\".0\",\"\"),\n", + " total_stops_build.clip(upper=4).replace({4:\"4+\",-np.inf:\"Removed\"}).astype(str).str.replace(\".0\",\"\"),\n", + " margins=True,\n", + " margins_name='Total')\n", + "\n", + "display(xtab.sort_index())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "331e7142", + "metadata": {}, + "outputs": [], + "source": [ + "summary_text = f\"\"\"**Stop frequency changes:**\n", + " Tours with fewer stops in build: {len(diff[diff < 0])}\n", + " Tours with the same stops in build: {len(diff[diff==0])}\n", + " Tours with more stops in build: {len(diff[diff > 0])}\n", + "\"\"\"\n", + "display(Markdown(summary_text.replace(\"\\n\",\"
\")))" + ] + }, + { + "cell_type": "raw", + "id": "21679760", + "metadata": {}, + "source": [ + "::: {.panel-tabset}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c04587c3", + "metadata": {}, + "outputs": [], + "source": [ + "# | output: asis\n", + "# above comment is needed for Quarto to render subtabs correctly\n", + "\n", + "# only trips that exist in the base run will be output\n", + "# looping thru name_dict will not find cases where the base value for purpose or category is nan in the outer join\n", + "for key, value in name_dict.items():\n", + " df_purp = df.loc[(df.primary_purpose_base == key) | (df.primary_purpose_build == key)]\n", + " if len(df_purp) == 0:\n", + " continue \n", + " \n", + " print(f\"#### {value}\")\n", + " df_purp_cases = df_purp.Case.value_counts()\n", + " print(str(df_purp_cases).replace(\"\\n\",\"
\"))\n", + " \n", + " fig = vh.create_pie_chart(df_purp_cases.to_frame().sort_index(),[\"count\"])\n", + " \n", + " fig.show()\n", + " display(Markdown(\" \"))" + ] + }, + { + "cell_type": "raw", + "id": "1a466645", + "metadata": {}, + "source": [ + ":::" + ] + }, { "cell_type": "markdown", "id": "c1a39b2b", @@ -1095,11 +1191,11 @@ "\n", "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','primary_purpose','destination', 'parent_tour_id'])\n", + " usecols=['tour_id','tour_category','primary_purpose','origin','destination', 'parent_tour_id'])\n", "\n", "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','primary_purpose','destination', 'parent_tour_id'])\n", + " usecols=['tour_id','tour_category','primary_purpose','origin','destination', 'parent_tour_id'])\n", "\n", "# keep work tours only\n", "base_work_tour = base_tour[base_tour.primary_purpose == 'work']\n", @@ -1114,9 +1210,11 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"\"\"Filtering work tours by destination MAZ.\\nOriginal tours in base: {len(base_work_tour)}\\tbuild: {len(build_work_tour)}\\tdiff: {len(build_work_tour)-len(base_work_tour)}\"\"\")\n", + "print(f\"\"\"Filtering work tours by origin {\n", + " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", + " } destination MAZ.\\nOriginal work tours in base: {len(base_work_tour)}\\tbuild: {len(build_work_tour)}\\tdiff: {len(build_work_tour)-len(base_work_tour)}\"\"\")\n", "\n", - "base_work_tour = base_work_tour[single_filter_mazs(base_work_tour.destination)] # base tour in the filtered set\n", + "base_work_tour = base_work_tour[multi_filter_mazs([base_work_tour.origin, base_work_tour.destination])] # base tour in the filtered set\n", "\n", "# special build-case filtering\n", "build_work_tour = build_work_tour[(\n", @@ -1124,10 +1222,10 @@ " & build_work_tour.index.isin(base_work_tour.index) # and was in the current set for the base\n", " ) | # OR\n", " ((~build_work_tour.index.isin(base_work_tour_idx)) # is a new tour \n", - " & single_filter_mazs(build_work_tour.destination)# and it's in this set\n", + " & multi_filter_mazs([build_work_tour.origin, build_work_tour.destination]) # and it's in this set\n", " )]\n", "\n", - "print(f\"After filtering, tours in base: {len(base_work_tour)}\\tbuild: {len(build_work_tour)}\\tdiff: {len(build_work_tour)-len(base_work_tour)}\")" + "print(f\"After filtering, work tours in base: {len(base_work_tour)}\\tbuild: {len(build_work_tour)}\\tdiff: {len(build_work_tour)-len(base_work_tour)}\")" ] }, { @@ -1192,11 +1290,11 @@ "source": [ "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])\n", + " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end','tour_mode'])\n", "\n", "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])\n" + " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end','tour_mode'])\n" ] }, { @@ -1206,9 +1304,16 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"\"\"Filtering tours by origin {\n", + "print(f\"\"\"Filtering at-work subtours by origin {\n", " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\\tdiff: {len(build_tour)-len(base_tour)}\"\"\")\n", + " } destination MAZ.\\nOriginal subtours in base: {\n", + " len(base_tour[base_tour.tour_category == 'atwork'])\n", + " }\\tbuild: {\n", + " len(build_tour[build_tour.tour_category == 'atwork'])\n", + " }\\tdiff: {\n", + " len(build_tour[build_tour.tour_category == 'atwork'])\n", + " -len(base_tour[base_tour.tour_category == 'atwork'])\n", + " }\"\"\")\n", "\n", "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", "\n", @@ -1221,7 +1326,11 @@ " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", " )]\n", "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" + "print(f\"\"\"After filtering, at-work subtours in base: {\n", + " len(base_tour[base_tour.tour_category == 'atwork'])\n", + " }\\tbuild: {\n", + " len(build_tour[build_tour.tour_category == 'atwork'])\n", + " }\"\"\")" ] }, { @@ -1283,51 +1392,6 @@ "## At-Work Subtour Scheduling" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ee088c27", - "metadata": {}, - "outputs": [], - "source": [ - "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])\n", - "base_tour = base_tour[base_tour.tour_category == 'atwork']\n", - "\n", - "base_tour_idx = base_tour.index\n", - "\n", - "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])\n", - "build_tour = build_tour[build_tour.tour_category == 'atwork']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8ab8310", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\"\"Filtering tours by origin {\n", - " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\\tdiff: {len(build_tour)-len(base_tour)}\"\"\")\n", - "\n", - "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", - "\n", - "# special build-case filtering\n", - "build_tour = build_tour[(\n", - " build_tour.index.isin(base_tour_idx) # originally existed in base\n", - " & build_tour.index.isin(base_tour.index) # and was in the current set for the base\n", - " ) | # OR\n", - " ((~build_tour.index.isin(base_tour_idx)) # is a new tour \n", - " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", - " )]\n", - "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1356,12 +1420,12 @@ "# join base and build tours on tour_id\n", "# compare tours that are in both base and build datasets\n", "df = pd.merge(\n", - " base_tour,\n", - " build_tour,\n", + " base_tour[base_tour.tour_category == 'atwork'],\n", + " build_tour[build_tour.tour_category == 'atwork'],\n", " left_index=True,\n", " right_index=True,\n", " suffixes=('_base', '_build'),\n", - " how='inner'\n", + " how='outer'\n", ")\n", "\n", "# get the difference in start and end times\n", @@ -1389,12 +1453,12 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.start_period_base.replace(mapper),\n", - " df.start_period_build.replace(mapper),\n", + " df.start_period_base.replace(mapper).fillna('Newly created'),\n", + " df.start_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab.loc[tp_order,tp_order])" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -1413,12 +1477,12 @@ "outputs": [], "source": [ "xtab = pd.crosstab(\n", - " df.end_period_base.replace(mapper),\n", - " df.end_period_build.replace(mapper),\n", + " df.end_period_base.replace(mapper).fillna('Newly created'),\n", + " df.end_period_build.replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab.loc[tp_order,tp_order])" + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))" ] }, { @@ -1448,47 +1512,6 @@ "## At-Work Subtour Mode Choice" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0aa09d92-6245-4a8c-bbab-ec78cc62c4bf", - "metadata": {}, - "outputs": [], - "source": [ - "usecols = ['tour_id','tour_category','origin','destination','tour_category', 'tour_mode']\n", - "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=usecols)\n", - "\n", - "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=usecols)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4fd7cb3-1920-4e62-b572-7325dee94ecd", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\"\"Filtering tours by origin {\n", - " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\"\"\")\n", - "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", - "\n", - "# special build-case filtering\n", - "build_tour = build_tour[(\n", - " build_tour.index.isin(base_tour_idx) # originally existed in base\n", - " & build_tour.index.isin(base_tour.index) # and was in the current set for the base\n", - " ) | # OR\n", - " ((~build_tour.index.isin(base_tour_idx)) # is a new tour \n", - " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", - " )]\n", - "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1532,16 +1555,14 @@ "metadata": {}, "outputs": [], "source": [ - "# xtab = df[[\"tour_mode_base\", \"tour_mode_build\"]].replace(combiners).value_counts(dropna=False).unstack().fillna(0)\n", - "# xtab.loc[order,order]\n", - "order = pd.Series(order)\n", + "\n", "xtab = pd.crosstab(\n", " df.tour_mode_base.replace(combiners).fillna('Newly created'),\n", " df.tour_mode_build.replace(combiners).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total')\n", "\n", - "display(xtab.loc[order[order.isin(xtab.index)],order[order.isin(xtab.columns)]])" + "display(xtab.reindex(index=mode_order_base,columns=mode_order_build,fill_value=0))" ] }, { @@ -1567,184 +1588,6 @@ "\"\"\"\n", "display(Markdown(summary_text.replace(\"\\n\",\"
\")))" ] - }, - { - "cell_type": "markdown", - "id": "ac961eb2", - "metadata": {}, - "source": [ - "## Stop Frequency" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5eb7a6cd-6212-4503-9509-105802cff771", - "metadata": {}, - "outputs": [], - "source": [ - "usecols = ['tour_id','tour_category','origin','destination','stop_frequency', 'primary_purpose']\n", - "base_tour = pd.read_csv(f\"{base_dir}/final_tours.csv\", \n", - " index_col='tour_id', \n", - " usecols=usecols)\n", - "\n", - "build_tour = pd.read_csv(f\"{build_dir}/final_tours.csv\", \n", - " index_col='tour_id',\n", - " usecols=usecols)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5698e6b-9d80-4398-90ae-21b55e3b049b", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\"\"Filtering tours by origin {\n", - " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", - " } destination MAZ.\\nOriginal tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\"\"\")\n", - "\n", - "base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set\n", - "\n", - "# special build-case filtering\n", - "build_tour = build_tour[(\n", - " build_tour.index.isin(base_tour_idx) # originally existed in base\n", - " & build_tour.index.isin(base_tour.index) # and was in the current set for the base\n", - " ) | # OR\n", - " ((~build_tour.index.isin(base_tour_idx)) # is a new tour \n", - " & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set\n", - " )]\n", - "\n", - "print(f\"After filtering, tours in base: {len(base_tour)}\\tbuild: {len(build_tour)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "317ed485-59ed-469d-9ba0-d6480e61c7e0", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " base_tour,\n", - " build_tour,\n", - " how='outer',\n", - " left_index=True,\n", - " right_index=True,\n", - " suffixes=['_base', '_build']\n", - ")\n", - "\n", - "df.loc[df.stop_frequency_base == df.stop_frequency_build,'Case'] = \"Unchanged\"\n", - "df.loc[df.stop_frequency_base != df.stop_frequency_build,'Case'] = \"Changed\"\n", - "\n", - "df.loc[df.stop_frequency_base.isna(),'Case'] = \"Newly Created\"\n", - "df.loc[df.stop_frequency_build.isna(),\"Case\"] = \"Removed\"\n", - "\n", - "df.Case.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "38df880b", - "metadata": {}, - "outputs": [], - "source": [ - "total_stops_base = (df.stop_frequency_base.str[0].astype(float) \n", - " + df.stop_frequency_base.str[5].astype(float)\n", - " ).fillna(-np.inf).clip(upper=4)\n", - "\n", - "total_stops_build = (df.stop_frequency_build.str[0].astype(float) \n", - " + df.stop_frequency_build.str[5].astype(float)\n", - " ).fillna(-np.inf).clip(upper=4)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9453567-16cd-47f1-b123-96ccaf0cd2ee", - "metadata": {}, - "outputs": [], - "source": [ - "fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(), [\"count\"])\n", - "fig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db5a41d1-123e-43b1-b5c9-7f32009c2dbe", - "metadata": {}, - "outputs": [], - "source": [ - "diff = total_stops_build - total_stops_base\n", - "xtab = pd.crosstab(\n", - " total_stops_base.replace({4:\"4+\",-np.inf:\"Newly created\"}).astype(str).str.replace(\".0\",\"\"),\n", - " total_stops_build.clip(upper=4).replace({4:\"4+\",-np.inf:\"Removed\"}).astype(str).str.replace(\".0\",\"\"),\n", - " margins=True,\n", - " margins_name='Total')\n", - "\n", - "display(xtab.sort_index())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f6eb5e6", - "metadata": {}, - "outputs": [], - "source": [ - "summary_text = f\"\"\"**Stop frequency changes:**\n", - " Tours with fewer stops in build: {len(diff[diff < 0])}\n", - " Tours with the same stops in build: {len(diff[diff==0])}\n", - " Tours with more stops in build: {len(diff[diff > 0])}\n", - "\"\"\"\n", - "display(Markdown(summary_text.replace(\"\\n\",\"
\")))" - ] - }, - { - "cell_type": "raw", - "id": "4115c2e0-dc14-410f-8716-457c78db6e70", - "metadata": {}, - "source": [ - "::: {.panel-tabset}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "536eca88-0f30-4fa3-a463-802fa3da2fb0", - "metadata": {}, - "outputs": [], - "source": [ - "# | output: asis\n", - "# above comment is needed for Quarto to render subtabs correctly\n", - "\n", - "# only trips that exist in the base run will be output\n", - "# looping thru name_dict will not find cases where the base value for purpose or category is nan in the outer join\n", - "for key, value in name_dict.items():\n", - " df_purp = df.loc[df.primary_purpose_base == key]\n", - " if len(df_purp) == 0:\n", - " continue \n", - " \n", - " print(f\"#### {value}\")\n", - " df_purp_cases = df_purp.Case.value_counts()\n", - " print(str(df_purp_cases).replace(\"\\n\",\"
\"))\n", - " \n", - " fig = vh.create_pie_chart(df_purp_cases.to_frame().sort_index(),[\"count\"])\n", - " \n", - " fig.show()\n", - " display(Markdown(\" \"))" - ] - }, - { - "cell_type": "raw", - "id": "1da243b8-b557-427f-82a0-3928c653f0bb", - "metadata": {}, - "source": [ - ":::" - ] } ], "metadata": { diff --git a/notebooks/trips.ipynb b/notebooks/trips.ipynb index 4e5a9ed..2eef78e 100644 --- a/notebooks/trips.ipynb +++ b/notebooks/trips.ipynb @@ -103,7 +103,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"\"\"Filtering trips by origin {\n", + "print(f\"\"\"Filtering all trips by origin {\n", " 'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'\n", " } destination MAZ.\\nOriginal trips in base: {len(base_trip)}\\tbuild: {len(build_trip)}\\tdiff: {len(build_trip)-len(base_trip)}\"\"\")\n", "\n", @@ -120,7 +120,7 @@ " \n", " ]\n", "\n", - "print(f\"After filtering, trips in base: {len(base_trip)}\\tbuild: {len(build_trip)}\\tdiff: {len(build_trip)-len(base_trip)}\")" + "print(f\"After filtering, total trips in base: {len(base_trip)}\\tbuild: {len(build_trip)}\\tdiff: {len(build_trip)-len(base_trip)}\")" ] }, { @@ -128,7 +128,7 @@ "id": "265516ee", "metadata": {}, "source": [ - "## Trip Purpose" + "## Intermediate Stop Purpose" ] }, { @@ -192,7 +192,7 @@ "id": "6a7d1649", "metadata": {}, "source": [ - "## Trip Destination" + "## Intermediate Stop Destination" ] }, { @@ -269,7 +269,7 @@ "# compare trips that are in both base and build\n", "df = base_trip.merge(\n", " build_trip, \n", - " how='inner', \n", + " how='outer', \n", " left_index=True, \n", " right_index=True,\n", " suffixes=('_base', '_build')\n", @@ -295,18 +295,19 @@ " 3: \"PM\",\n", " 4: \"EV\"\n", "}\n", - "tp_order = ['EA','AM','MD','PM','EV','Total']\n", + "tp_order_base = ['EA','AM','MD','PM','EV','Newly created','Total']\n", + "tp_order_build = ['EA','AM','MD','PM','EV','Removed','Total']\n", "\n", "metric='depart'\n", "purpose_df = df\n", - "display(Markdown(f\"### Trip departure changes\"))\n", + "display(Markdown(f\"### Trip departure changes for all trips\"))\n", "xtab = pd.crosstab(\n", - " purpose_df[f'{metric}_period_base'].replace(mapper),\n", - " purpose_df[f'{metric}_period_build'].replace(mapper),\n", + " purpose_df[f'{metric}_period_base'].replace(mapper).fillna('Newly created'),\n", + " purpose_df[f'{metric}_period_build'].replace(mapper).fillna('Removed'),\n", " margins=True,\n", " margins_name='Total'\n", ")\n", - "display(xtab.loc[tp_order,tp_order])\n", + "display(xtab.reindex(index=tp_order_base,columns=tp_order_build,fill_value=0))\n", "\n", "summary_text = f\"\"\"**Trip departure changes:**\n", " Trips which departed earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}\n", @@ -429,7 +430,7 @@ "metadata": {}, "outputs": [], "source": [ - "summary_text = f\"\"\"**Trip mode changes:**\n", + "summary_text = f\"\"\"**Trip mode changes for all trips:**\n", "To transit\n", "

\n", "from auto: {len(df[df.trip_mode_base.isin(auto_modes) & df.trip_mode_build.isin(transit_modes)])}\n", diff --git a/visualizer_helpers.py b/visualizer_helpers.py index a968f69..d246a87 100644 --- a/visualizer_helpers.py +++ b/visualizer_helpers.py @@ -34,6 +34,8 @@ "Removed": "Cherry_light", "Increased": "Leaf", "Decreased": "Cherry", + "Workplace moved into area": "Leaf_light", + "Workplace moved out of area": "Cherry_light", } def create_bar_chart( @@ -252,7 +254,7 @@ def unaffected_multi(xs: pd.Series) -> pd.Series: def get_time_period_index( bin: int, - time_period_mapping: Optional[list] = [0,12,22,32,40,48], + time_period_mapping: Optional[list] = [0,6,12,25,32,48], ) -> int: """ Convert a bin number to a time period index.