diff --git a/bus_procurement_cost/FTA_bus_grant_analysis.ipynb b/bus_procurement_cost/FTA_bus_grant_analysis.ipynb index 8fd502843..7ff562987 100644 --- a/bus_procurement_cost/FTA_bus_grant_analysis.ipynb +++ b/bus_procurement_cost/FTA_bus_grant_analysis.ipynb @@ -33,7 +33,7 @@ }, "outputs": [], "source": [ - "# import shared_utils\n", + "import shared_utils\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", @@ -74,250 +74,106 @@ }, "source": [ "## Data Cleaning\n", - "1. snake-case column name\n", - "2. currency format funcding column (with $ and , )\n", + "1. snake-case column names\n", + "2. remove currency formatting from funding column (with $ and , )\n", "3. seperate text from # of bus col (split at '(')\n", " a. trim spaces in new col\n", " b. get rid of () characters in new col\n", - "4. trim spaces in other columns?" + "4. trim spaces in other columns\n", + "5. exnamine column values and replace/update as needed\n", + "6. create new columns for bus size type and prop type\n" + ] + }, + { + "cell_type": "markdown", + "id": "cb70936a-8d54-4ae5-b9cc-ef64ea04c8b5", + "metadata": { + "tags": [] + }, + "source": [ + "### Dataframe cleaning" ] }, { "cell_type": "code", "execution_count": 3, - "id": "797069c2-1937-4b11-bc6f-f1e5eb39534a", + "id": "7efb6ebf-474a-4c58-8052-a7427e881649", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", - " '#_of_buses', 'project_type', 'propulsion_type', 'area_served',\n", - " 'congressional_districts', 'fta_region', 'bus/low-no_program'],\n", - " dtype='object')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# snake case columns names via list\n", - "new_col = [\n", - " \"state\",\n", - " \"project_sponsor\",\n", - " \"project_title\",\n", - " \"description\",\n", - " \"funding\",\n", - " \"#_of_buses\",\n", - " \"project_type\",\n", - " \"propulsion_type\",\n", - " \"area_served\",\n", - " \"congressional_districts\",\n", - " \"fta_region\",\n", - " \"bus/low-no_program\",\n", - "]\n", + "# fucntions to clean up dataframe and df columns\n", + "def snake_case(df):\n", + " df.columns=df.columns.str.lower()\n", + " df.columns=df.columns.str.replace(\" \", \"_\")\n", + " df.columns=df.columns.str.strip()\n", "\n", - "df.columns = new_col\n", - "df.columns" + "def fund_cleaner(df,column):\n", + " df[column]= df[column].str.replace(\"$\", \"\")\n", + " df[column]= df[column].str.replace(\",\", \"\")\n", + " df[column]= df[column].str.strip()\n", + "\n" ] }, { "cell_type": "code", "execution_count": 4, - "id": "03193a06-1851-4c51-bd45-5576ff66993c", + "id": "f55008ae-1de5-4cd3-89f2-87cd4eac17cd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('O')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "$5,000,000 3\n", - "$6,000,000 2\n", - "$3,400,000 2\n", - "$104,000,000 1\n", - "$4,313,552 1\n", - "$3,133,129 1\n", - "$3,187,200 1\n", - "$3,199,038 1\n", - "$3,248,500 1\n", - "$3,303,600 1\n", - "$3,326,067 1\n", - "$3,609,800 1\n", - "$3,645,000 1\n", - "$3,937,500 1\n", - "$4,094,652 1\n", - "$4,278,772 1\n", - "$4,500,000 1\n", - "$4,492,904 1\n", - "$2,860,250 1\n", - "$4,690,010 1\n", - "$4,738,886 1\n", - "$5,001,700 1\n", - "$5,750,351 1\n", - "$5,883,200 1\n", - "$5,945,553 1\n", - "$6,197,180 1\n", - "$6,341,306 1\n", - "$6,407,460 1\n", - "$6,424,808 1\n", - "$6,455,325 1\n", - "$2,932,500 1\n", - "$2,819,460 1\n", - "$103,000,000 1\n", - "$1,080,000 1\n", - "$233,760 1\n", - "$280,800 1\n", - "$300,000 1\n", - "$320,000 1\n", - "$514,002 1\n", - "$653,184 1\n", - "$723,171 1\n", - "$753,118 1\n", - "$776,714 1\n", - "$945,178 1\n", - "$1,006,750 1\n", - "$1,010,372 1\n", - "$1,055,365 1\n", - "$1,145,951 1\n", - "$2,359,072 1\n", - "$1,162,000 1\n", - "$1,200,000 1\n", - "$1,276,628 1\n", - "$1,280,000 1\n", - "$1,456,970 1\n", - "$1,506,618 1\n", - "$1,672,000 1\n", - "$1,760,000 1\n", - "$2,063,160 1\n", - "$2,160,000 1\n", - "$2,162,886 1\n", - "$2,207,758 1\n", - "$2,212,747 1\n", - "$6,586,104 1\n", - "$6,635,394 1\n", - "$6,859,296 1\n", - "$28,947,368 1\n", - "$19,040,336 1\n", - "$20,000,000 1\n", - "$20,370,793 1\n", - "$20,381,950 1\n", - "$21,490,560 1\n", - "$22,286,745 1\n", - "$22,469,312 1\n", - "$23,280,546 1\n", - "$23,984,700 1\n", - "$25,000,000 1\n", - "$25,513,684 1\n", - "$25,906,730 1\n", - "$26,437,120 1\n", - "$29,330,243 1\n", - "$7,305,526 1\n", - "$29,331,665 1\n", - "$30,128,378 1\n", - "$30,890,413 1\n", - "$31,535,000 1\n", - "$33,552,634 1\n", - "$37,808,113 1\n", - "$37,962,840 1\n", - "$39,142,124 1\n", - "$39,863,156 1\n", - "$40,402,548 1\n", - "$47,000,000 1\n", - "$71,439,261 1\n", - "$80,000,000 1\n", - "$18,262,255 1\n", - "$17,853,710 1\n", - "$17,532,900 1\n", - "$17,055,353 1\n", - "$7,393,183 1\n", - "$7,440,000 1\n", - "$7,443,765 1\n", - "$7,526,400 1\n", - "$7,598,425 1\n", - "$7,852,320 1\n", - "$8,122,850 1\n", - "$8,740,728 1\n", - "$8,784,606 1\n", - "$9,644,865 1\n", - "$9,650,646 1\n", - "$9,806,428 1\n", - "$9,899,120 1\n", - "$10,000,000 1\n", - "$10,388,000 1\n", - "$10,694,736 1\n", - "$10,700,000 1\n", - "$11,560,000 1\n", - "$11,570,906 1\n", - "$12,299,377 1\n", - "$12,458,500 1\n", - "$12,600,000 1\n", - "$13,295,699 1\n", - "$13,880,910 1\n", - "$15,423,904 1\n", - "$16,166,822 1\n", - "$16,358,000 1\n", - "$181,250 1\n", - "Name: funding, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], + "source": [ + "#snake case function to Df\n", + "snake_case(df)" + ] + }, + { + "cell_type": "markdown", + "id": "d012306e-86ff-4351-86b3-c3a8dc3145fd", + "metadata": { + "tags": [] + }, + "source": [ + "### Column Cleaning" + ] + }, + { + "cell_type": "markdown", + "id": "6fb7a5e0-6649-468b-9f0e-4b36281e0db0", + "metadata": { + "tags": [] + }, "source": [ - "# checking data type of funding col\n", - "# checking to see if any values are not numbers\n", - "# will need to clean up this col\n", - "display(df[\"funding\"].dtype, df.funding.value_counts())" + "#### propulsion_type rename to propulstion category" ] }, { "cell_type": "code", "execution_count": 5, - "id": "7efb6ebf-474a-4c58-8052-a7427e881649", + "id": "9e22e6fa-857d-44c3-beec-0f83f71a6b1c", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_673/2798189375.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df['funding']=df['funding'].str.replace('$','')\n" - ] - } - ], + "outputs": [], "source": [ - "# clean up funding column. removing $ and , and making column as int64\n", - "df[\"funding\"] = df[\"funding\"].str.replace(\"$\", \"\")\n", - "df[\"funding\"] = df[\"funding\"].str.replace(\",\", \"\")\n", - "df[\"funding\"] = df[\"funding\"].astype(\"int64\")" + "#rename col to propulsion category\n", + "df=df.rename(columns={'propulsion_type':'propulsion_category'})" ] }, { "cell_type": "code", "execution_count": 6, - "id": "2316c492-d1c5-43fd-abb8-d9a0c510f49e", + "id": "6d7d9992-fc2c-473f-bad3-f5dbbb68731f", + "metadata": {}, + "outputs": [], + "source": [ + "# make values in prop_cat col lower case and remove spaces\n", + "df[\"propulsion_category\"] = df[\"propulsion_category\"].str.lower()\n", + "df[\"propulsion_category\"] = df[\"propulsion_category\"].str.replace(\" \", \"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b78f4274-9b4e-464b-8b59-00882853b6ea", "metadata": {}, "outputs": [ - { - "data": { - "text/plain": [ - "dtype('int64')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -344,9 +200,9 @@ " project_title\n", " description\n", " funding\n", - " #_of_buses\n", + " approx_#_of_buses\n", " project_type\n", - " propulsion_type\n", + " propulsion_category\n", " area_served\n", " congressional_districts\n", " fta_region\n", @@ -360,7 +216,7 @@ " Washington Metropolitan Area Transit Authority...\n", " Battery-Electric Metrobus Procurement and Elec...\n", " WMATA will receive funding to convert its Cind...\n", - " 104000000\n", + " $104,000,000\n", " 100(beb)\n", " bus/chargers\n", " zero\n", @@ -375,7 +231,7 @@ " Dallas Area Rapid Transit (DART)\n", " DART CNG Bus Fleet Modernization Project\n", " Dallas Area Rapid Transit will receive funding...\n", - " 103000000\n", + " $103,000,000\n", " 90 (estimated-CNG buses)\n", " bus\n", " low\n", @@ -390,7 +246,7 @@ " Southeastern Pennsylvania Transportation Autho...\n", " SEPTA Zero-Emission Bus Transition Facility Sa...\n", " The Southeastern Pennsylvania Transportation A...\n", - " 80000000\n", + " $80,000,000\n", " 0\n", " facility\n", " zero\n", @@ -399,36 +255,6 @@ " 3\n", " Low-No\n", " \n", - " \n", - " 3\n", - " LA\n", - " New Orleans Regional Transit Authority\n", - " Accelerating Zero-Emissions Mobility for a Res...\n", - " The New Orleans Regional Transit Authority wil...\n", - " 71439261\n", - " 20 (zero-emission)\n", - " Bus / Chargers / Equipment\n", - " zero\n", - " Large Urban\n", - " LA-002 ; LA-001\n", - " 6\n", - " Low-No\n", - " \n", - " \n", - " 4\n", - " NJ\n", - " New Jersey Transit Corporation\n", - " Hilton Bus Garage Modernization\n", - " New Jersey Transit will receive funding to mod...\n", - " 47000000\n", - " 0\n", - " facility/chargers\n", - " zero\n", - " Large Urban\n", - " nj-011\n", - " 2\n", - " Bus\n", - " \n", " \n", "\n", "" @@ -438,192 +264,94 @@ "0 DC Washington Metropolitan Area Transit Authority... \n", "1 TX Dallas Area Rapid Transit (DART) \n", "2 PA Southeastern Pennsylvania Transportation Autho... \n", - "3 LA New Orleans Regional Transit Authority \n", - "4 NJ New Jersey Transit Corporation \n", "\n", " project_title \\\n", "0 Battery-Electric Metrobus Procurement and Elec... \n", "1 DART CNG Bus Fleet Modernization Project \n", "2 SEPTA Zero-Emission Bus Transition Facility Sa... \n", - "3 Accelerating Zero-Emissions Mobility for a Res... \n", - "4 Hilton Bus Garage Modernization \n", "\n", - " description funding \\\n", - "0 WMATA will receive funding to convert its Cind... 104000000 \n", - "1 Dallas Area Rapid Transit will receive funding... 103000000 \n", - "2 The Southeastern Pennsylvania Transportation A... 80000000 \n", - "3 The New Orleans Regional Transit Authority wil... 71439261 \n", - "4 New Jersey Transit will receive funding to mod... 47000000 \n", + " description funding \\\n", + "0 WMATA will receive funding to convert its Cind... $104,000,000 \n", + "1 Dallas Area Rapid Transit will receive funding... $103,000,000 \n", + "2 The Southeastern Pennsylvania Transportation A... $80,000,000 \n", "\n", - " #_of_buses project_type propulsion_type \\\n", - "0 100(beb) bus/chargers zero \n", - "1 90 (estimated-CNG buses) bus low \n", - "2 0 facility zero \n", - "3 20 (zero-emission) Bus / Chargers / Equipment zero \n", - "4 0 facility/chargers zero \n", + " approx_#_of_buses project_type propulsion_category area_served \\\n", + "0 100(beb) bus/chargers zero Large Urban \n", + "1 90 (estimated-CNG buses) bus low Large Urban \n", + "2 0 facility zero Large Urban \n", "\n", - " area_served congressional_districts fta_region \\\n", - "0 Large Urban DC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-011 3 \n", - "1 Large Urban TX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T... 6 \n", - "2 Large Urban PA-002 ; PA-003 ; PA-004 ; PA-005 3 \n", - "3 Large Urban LA-002 ; LA-001 6 \n", - "4 Large Urban nj-011 2 \n", + " congressional_districts fta_region \\\n", + "0 DC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-011 3 \n", + "1 TX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T... 6 \n", + "2 PA-002 ; PA-003 ; PA-004 ; PA-005 3 \n", "\n", " bus/low-no_program \n", "0 Low-No \n", "1 Low-No \n", - "2 Low-No \n", - "3 Low-No \n", - "4 Bus " + "2 Low-No " ] }, + "execution_count": 7, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "# checking to see if str.replace worked.\n", - "display(df[\"funding\"].dtype, df.head())" + "df.head(3)" ] }, { - "cell_type": "code", - "execution_count": 7, - "id": "1fb94754-e795-4e78-8a91-2732566a1792", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "354cb76f-f71f-4a30-8c85-aece5ac3f0d3", + "metadata": { + "tags": [] + }, "source": [ - "# test of removing the spaces first in # of bus colum, THEN split by (\n", - "df[\"#_of_buses\"] = df[\"#_of_buses\"].str.replace(\" \", \"\")" + "#### funding" ] }, { "cell_type": "code", "execution_count": 8, - "id": "f1203ac6-0247-47f8-9f96-0cac2129556d", + "id": "ff8721be-5cbd-430f-b947-4110c397de23", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "0 34\n", - "7(Electric) 3\n", - "2 3\n", - "4(BEBs) 3\n", - "20(BEBs) 3\n", - "2(electric) 2\n", - "9 2\n", - "16(hybridelectric) 2\n", - "5(CNG) 2\n", - "4(cng) 2\n", - "6 2\n", - "7 2\n", - "5 2\n", - "10(CNG) 1\n", - "4(zeroemissionelectric) 1\n", - "11(CNGfueled) 1\n", - "4(hybridelectric) 1\n", - "25(hybridelectrics) 1\n", - "6(dieselelectrichybrids) 1\n", - "39 1\n", - "9(CNG) 1\n", - "4(hybrid) 1\n", - "10(BEBs) 1\n", - "4 1\n", - "100(beb) 1\n", - "15 1\n", - "9(dieselandgas) 1\n", - "3(1:CNGbus;2cutawayCNGbuses) 1\n", - "1(hybrid) 1\n", - "4(propane) 1\n", - "3(propanedpoweredvehicles) 1\n", - "6(CNG) 1\n", - "5(hybrid) 1\n", - "1(zeroemission) 1\n", - "9(propanebuses) 1\n", - "6(diesel-electrichybrids) 1\n", - "7(propanebuses) 1\n", - "3(diesel-electric) 1\n", - "2(hybrid) 1\n", - "5(cng) 1\n", - "7(hybridelectric) 1\n", - "15(propane) 1\n", - "10(hybridelectricbuses) 1\n", - "7(4fuelcell/3CNG) 1\n", - "11(Electric) 1\n", - "20(BEB) 1\n", - "8 1\n", - "12(FuelCell) 1\n", - "39(CNG) 1\n", - "18 1\n", - "17(BEBs) 1\n", - "13(BEBs) 1\n", - "25(FuelCellElectric) 1\n", - "23(FCEB) 1\n", - "69 1\n", - "15(Electric)\\n16(Hybrid) 1\n", - "30(BEBs) 1\n", - "16(BEBs) 1\n", - "35(BEBs) 1\n", - "40(cngbuses) 1\n", - "20(zero-emission) 1\n", - "20(hybrid) 1\n", - "13 1\n", - "56estimated-cutawayvans(PM-awardwillnotfund68buses) 1\n", - "12batteryelectric 1\n", - "90(estimated-CNGbuses) 1\n", - "6(2BEBsand4HydrogenFuelCellBuses) 1\n", - "6(hydrogenfuelcell) 1\n", - "11(dieselelectrichybrids) 1\n", - "11 1\n", - "6(BEBs) 1\n", - "8(zeroemissionbuses) 1\n", - "42 1\n", - "50(BEBsparatransitbuses) 1\n", - "14(BEB) 1\n", - "134 1\n", - "37(cng) 1\n", - "160 1\n", - "31 1\n", - "25(lowemissionCNG) 1\n", - "3 1\n", - "Name: #_of_buses, dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_762/4154908731.py:8: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + " df[column]= df[column].str.replace(\"$\", \"\")\n" + ] } ], "source": [ - "# spaces removed, and zeros are kept\n", - "df[\"#_of_buses\"].value_counts()" + "fund_cleaner(df,'funding')" ] }, { "cell_type": "code", "execution_count": 9, - "id": "1e682fca-2f4b-4459-a46e-fccce29f2650", + "id": "a40fd940-153f-4d64-96fb-568d130beb34", "metadata": {}, "outputs": [], "source": [ - "# spliting the # of buses column into 2, using the ( char as the delimiter\n", - "df[[\"bus_count\", \"bus_desc\"]] = df[\"#_of_buses\"].str.split(pat=\"(\", n=1, expand=True)" + "df[\"funding\"] = df[\"funding\"].astype(\"int64\")" ] }, { "cell_type": "code", "execution_count": 10, - "id": "156b81ec-4807-4226-97cf-cd36dc39dfd4", + "id": "7dba2d0a-b475-4f2a-a483-2a24a777a46d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", - " '#_of_buses', 'project_type', 'propulsion_type', 'area_served',\n", - " 'congressional_districts', 'fta_region', 'bus/low-no_program',\n", - " 'bus_count', 'bus_desc'],\n", + " 'approx_#_of_buses', 'project_type', 'propulsion_category',\n", + " 'area_served', 'congressional_districts', 'fta_region',\n", + " 'bus/low-no_program'],\n", " dtype='object')" ] }, @@ -633,74 +361,56 @@ } ], "source": [ - "# checking col. retained the initial col. and added new columns to the end.\n", "df.columns" ] }, + { + "cell_type": "markdown", + "id": "7077380f-1f92-4108-bac1-77db8f79568d", + "metadata": { + "tags": [] + }, + "source": [ + "#### split `approx_#_of_buses` to `bus_count` and `prop_type`" + ] + }, { "cell_type": "code", "execution_count": 11, - "id": "fd20bd24-6df2-4b7d-ad92-2b52fe2c6a41", + "id": "1fb94754-e795-4e78-8a91-2732566a1792", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 34\n", - "4 10\n", - "7 8\n", - "6 8\n", - "20 6\n", - "2 6\n", - "5 6\n", - "9 5\n", - "11 4\n", - "3 4\n", - "16 3\n", - "15 3\n", - "10 3\n", - "25 3\n", - "8 2\n", - "39 2\n", - "1 2\n", - "13 2\n", - "56estimated-cutawayvans 1\n", - "134 1\n", - "42 1\n", - "50 1\n", - "14 1\n", - "100 1\n", - "37 1\n", - "160 1\n", - "31 1\n", - "12batteryelectric 1\n", - "90 1\n", - "18 1\n", - "17 1\n", - "23 1\n", - "69 1\n", - "30 1\n", - "35 1\n", - "40 1\n", - "12 1\n", - "Name: bus_count, dtype: int64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# examining the new bus count col.\n", - "# zero values remained the same\n", - "# see there are 2 values that are inconsistent.\n", - "df.bus_count.value_counts()" + "# test of removing the spaces first in # of bus colum, THEN split by (\n", + "df[\"approx_#_of_buses\"] = df[\"approx_#_of_buses\"].str.replace(\" \", \"\")" ] }, { "cell_type": "code", "execution_count": 12, + "id": "1e682fca-2f4b-4459-a46e-fccce29f2650", + "metadata": {}, + "outputs": [], + "source": [ + "# spliting the # of buses column into 2, using the ( char as the delimiter\n", + "# also fills `none` values with `needs manual check`\n", + "df[[\"bus_count\", \"prop_type\"]] = df[\"approx_#_of_buses\"].str.split(pat=\"(\", n=1, expand=True)\n", + "df[[\"bus_count\", \"prop_type\"]] = df[[\"bus_count\", \"prop_type\"]].fillna('needs manual check')" + ] + }, + { + "cell_type": "markdown", + "id": "7a6e2c90-9da1-4cfb-8032-397baa74579a", + "metadata": { + "tags": [] + }, + "source": [ + "#### bus_count" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "id": "3d3c02c1-6582-4034-b0df-596235ac35fd", "metadata": {}, "outputs": [], @@ -713,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "261cac0b-6427-4420-bbb2-41121574b09e", "metadata": {}, "outputs": [], @@ -724,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "887f3bce-9b0e-45a1-93ae-920a3584f466", "metadata": {}, "outputs": [ @@ -753,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "46065069-4d01-4f54-a132-a671d50ac9b9", "metadata": {}, "outputs": [], @@ -766,324 +476,218 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "8fe33276-6264-4c6e-80ec-530f315e79ce", "metadata": {}, "outputs": [], "source": [ "# updating values again for bus_desc. same location\n", - "df.loc[58, \"bus_desc\"] = \"estimated-cutaway vans (PM- award will not fund 68 buses)\"\n", - "df.loc[32, \"bus_desc\"] = \"battery electric\"" + "df.loc[58, \"prop_type\"] = \"estimated-cutaway vans (PM- award will not fund 68 buses)\"\n", + "df.loc[32, \"prop_type\"] = \"battery electric\"" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "28fc0972-03ae-4c56-a207-906732deb4ab", + "execution_count": 18, + "id": "7a4d910e-af55-46b0-a92c-aa52e1a892e8", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "state MN\n", - "project_sponsor Metro Transit\n", - "project_title Investments Toward an Electric Future: Metro T...\n", - "description Metro Transit will receive funding to buy batt...\n", - "funding 17532900\n", - "#_of_buses 12batteryelectric\n", - "project_type Bus / Chargers / Equipment\n", - "propulsion_type zero\n", - "area_served Large Urban\n", - "congressional_districts MN-002 ; MN-003 ; MN-004 ; MN-005 ; MN-006\n", - "fta_region 5\n", - "bus/low-no_program Low-No\n", - "bus_count 12\n", - "bus_desc battery electric\n", - "Name: 32, dtype: object" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "state TX\n", - "project_sponsor Texas Department of Transportation on behalf o...\n", - "project_title FY23 Rural Transit Asset Replacement & Moderni...\n", - "description The Texas Department of Transportation will re...\n", - "funding 7443765\n", - "#_of_buses 56estimated-cutawayvans(PM-awardwillnotfund68b...\n", - "project_type bus / facilitiy\n", - "propulsion_type low\n", - "area_served Rural\n", - "congressional_districts TX-001 ; TX-002 ; TX-004 ; TX-005 ; TX-006 ; T...\n", - "fta_region 6\n", - "bus/low-no_program Low-No\n", - "bus_count 56\n", - "bus_desc estimated-cutaway vans (PM- award will not fun...\n", - "Name: 58, dtype: object" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "# values updated as inteneded for bus count and bus desc\n", - "display(df.loc[32], df.loc[58])" + "# bus count for row 12 needs to be adjusted to 31 instead of 15\n", + "df.loc[12, \"bus_count\"] = 31" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "5773714f-1df8-403d-9074-9936a47522a3", + "execution_count": 19, + "id": "edfd4b23-e384-47a1-bb7b-461fd04c2d32", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 34\n", - "4 10\n", - "7 8\n", - "6 8\n", - "20 6\n", - "2 6\n", - "5 6\n", - "9 5\n", - "11 4\n", - "3 4\n", - "16 3\n", - "15 3\n", - "10 3\n", - "25 3\n", - "8 2\n", - "39 2\n", - "1 2\n", - "13 2\n", - "56 1\n", - "134 1\n", - "42 1\n", - "50 1\n", - "14 1\n", - "100 1\n", - "37 1\n", - "160 1\n", - "31 1\n", - "12 1\n", - "90 1\n", - "18 1\n", - "17 1\n", - "23 1\n", - "69 1\n", - "30 1\n", - "35 1\n", - "40 1\n", - "12 1\n", - "Name: bus_count, dtype: int64" + "state NC\n", + "project_sponsor City of Charlotte - Charlotte Area Transit System\n", + "project_title Charlotte Area Transit System's Sustainable Fl...\n", + "description The city of Charlotte will receive funding to ...\n", + "funding 30890413\n", + "approx_#_of_buses 15(Electric)\\n16(Hybrid)\n", + "project_type Bus / Chargers / Equipment\n", + "propulsion_category zero/low\n", + "area_served Large Urban\n", + "congressional_districts NC-008 ; NC-012 ; NC-013 ; NC-014 ; SC-005\n", + "fta_region 4\n", + "bus/low-no_program Bus\n", + "bus_count 31\n", + "prop_type Electric)\\n16(Hybrid)\n", + "Name: 12, dtype: object" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# confirming via value counts that all values are valid now.\n", - "df.bus_count.value_counts()" + "# confirming the change\n", + "df.loc[12]" ] }, { - "cell_type": "code", - "execution_count": 19, - "id": "09750ef1-1eba-4561-b5ad-b05f2a3b5875", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_673/2239180410.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", - " df[\"bus_desc\"] = df[\"bus_desc\"].str.replace(\")\", \"\")\n" - ] - } - ], + "cell_type": "markdown", + "id": "58fd7f90-2935-4e58-92c6-4253758ad3c1", + "metadata": { + "tags": [] + }, "source": [ - "# clearning the bus desc col.\n", - "# removing the )\n", - "df[\"bus_desc\"] = df[\"bus_desc\"].str.replace(\")\", \"\")" + "#### project_type" ] }, { "cell_type": "code", "execution_count": 20, - "id": "e298fe1d-ab31-4cae-aa18-309d349b4b78", + "id": "ca77c135-1168-489e-802e-402c614bbb77", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['beb', 'estimated-CNGbuses', None, 'zero-emission', 'cngbuses',\n", - " 'BEBs', 'Electric\\n16(Hybrid', 'FCEB', 'Electric',\n", - " 'FuelCellElectric', 'CNG', 'FuelCell', 'hybrid', 'BEB',\n", - " 'battery electric', 'lowemissionCNG', 'cng',\n", - " 'BEBsparatransitbuses', 'hybridelectric', 'zeroemissionbuses',\n", - " 'dieselelectrichybrids', 'hydrogenfuelcell',\n", - " '2BEBsand4HydrogenFuelCellBuses', '4fuelcell/3CNG',\n", - " 'estimated-cutaway vans (PM- award will not fund 68 buses',\n", - " 'hybridelectricbuses', 'CNGfueled', 'zeroemissionelectric',\n", - " 'hybridelectrics', 'dieselandgas', 'diesel-electrichybrids',\n", - " 'propane', 'electric', 'diesel-electric', 'propanebuses',\n", - " '1:CNGbus;2cutawayCNGbuses', 'zeroemission',\n", - " 'propanedpoweredvehicles'], dtype=object)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "df[\"bus_desc\"].unique()" + "# using str.lower() on project type\n", + "df[\"project_type\"] = df[\"project_type\"].str.lower()\n", + "# using str.lower() on project type\n", + "df[\"project_type\"] = df[\"project_type\"].str.replace(\" \", \"\")" ] }, { "cell_type": "code", "execution_count": 21, - "id": "03f52229-29ad-4551-aa70-a9d53124d61e", + "id": "2014394c-33d9-44e9-b162-ab21988d9e8d", "metadata": {}, "outputs": [], "source": [ - "# stripping the values in the bus desc col\n", - "df[\"bus_desc\"] = df[\"bus_desc\"].str.strip()" + "# some values still need to get adjusted. will use a short dictionary to fix\n", + "new_type = {\n", + " \"\\tbus/facility\": \"bus/facility\",\n", + " \"bus/facilitiy\": \"bus/facility\",\n", + " \"facilities\": \"facility\",\n", + "}" ] }, { "cell_type": "code", "execution_count": 22, - "id": "9202f2a1-f309-4b86-8602-6989fe4c577a", + "id": "a6b0eeb6-cb94-4bb8-82ac-ba3c3763a3aa", "metadata": {}, - "outputs": [ + "outputs": [], + "source": [ + "# using replace() with the dictionary to replace keys in project type col\n", + "# syntax df.replace({'bus_desc': new_dict}, inplace=True)\n", + "df.replace({\"project_type\": new_type}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "52eb4fce-7c4d-4253-893c-73c59c2ecff5", + "metadata": {}, + "outputs": [ { "data": { "text/plain": [ - "array(['beb', 'estimated-CNGbuses', None, 'zero-emission', 'cngbuses',\n", - " 'BEBs', 'Electric\\n16(Hybrid', 'FCEB', 'Electric',\n", - " 'FuelCellElectric', 'CNG', 'FuelCell', 'hybrid', 'BEB',\n", - " 'battery electric', 'lowemissionCNG', 'cng',\n", - " 'BEBsparatransitbuses', 'hybridelectric', 'zeroemissionbuses',\n", - " 'dieselelectrichybrids', 'hydrogenfuelcell',\n", - " '2BEBsand4HydrogenFuelCellBuses', '4fuelcell/3CNG',\n", - " 'estimated-cutaway vans (PM- award will not fund 68 buses',\n", - " 'hybridelectricbuses', 'CNGfueled', 'zeroemissionelectric',\n", - " 'hybridelectrics', 'dieselandgas', 'diesel-electrichybrids',\n", - " 'propane', 'electric', 'diesel-electric', 'propanebuses',\n", - " '1:CNGbus;2cutawayCNGbuses', 'zeroemission',\n", - " 'propanedpoweredvehicles'], dtype=object)" + "array(['bus/chargers', 'bus', 'facility', 'bus/chargers/equipment',\n", + " 'facility/chargers', 'bus/facility', 'bus/facility/chargers',\n", + " 'chargers', 'bus/chargers/other', 'bus/facility/equipment',\n", + " 'bus/equipment', 'bus/facility/chargers/equipment',\n", + " 'bus/facility/other', 'facility/chargers/equipment',\n", + " 'facility/equipment', 'chargers/equipment', 'bus/other',\n", + " 'bus/facility/equipment/other'], dtype=object)" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.bus_desc.unique()" + "df.project_type.unique()" ] }, { - "cell_type": "code", - "execution_count": 23, - "id": "bba62d67-35ab-4334-898b-f7581b40c574", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "dd2125a9-1117-4baa-b704-1aad30249e6d", + "metadata": { + "tags": [] + }, "source": [ - "# creating a dictionary to add spaces back to the values\n", - "new_dict = {\n", - " \"beb\": \"BEB\",\n", - " \"estimated-CNGbuses\": \"estimated-CNG buses\",\n", - " \"cngbuses\": \"CNG buses\",\n", - " \"BEBs\": \"BEB\",\n", - " \"Electric\\n16(Hybrid\": \"15 electic, 16 hybrid\",\n", - " \"FuelCellElectric\": \"fuel cell electric\",\n", - " \"FuelCell\": \"fuel cell\",\n", - " \"lowemissionCNG\": \"low emission CNG\",\n", - " \"cng\": \"CNG\",\n", - " \"BEBsparatransitbuses\": \"BEBs paratransit buses\",\n", - " \"hybridelectric\": \"hybrid electric\",\n", - " \"zeroemissionbuses\": \"zero emission buses\",\n", - " \"dieselelectrichybrids\": \"diesel electric hybrids\",\n", - " \"hydrogenfuelcell\": \"hydrogen fuel cell\",\n", - " \"2BEBsand4HydrogenFuelCellBuses\": \"2 BEBs and 4 hydrogen fuel cell buses\",\n", - " \"4fuelcell/3CNG\": \"4 fuel cell / 3 CNG\",\n", - " \"hybridelectricbuses\": \"hybrid electric buses\",\n", - " \"CNGfueled\": \"CNG fueled\",\n", - " \"zeroemissionelectric\": \"zero emission electric\",\n", - " \"hybridelectrics\": \"hybrid electrics\",\n", - " \"dieselandgas\": \"diesel and gas\",\n", - " \"diesel-electrichybrids\": \"diesel-electric hybrids\",\n", - " \"propanebuses\": \"propane buses\",\n", - " \"1:CNGbus;2cutawayCNGbuses\": \"1:CNGbus ;2 cutaway CNG buses\",\n", - " \"zeroemission\": \"zero emission\",\n", - " \"propanedpoweredvehicles\": \"propaned powered vehicles\",\n", - "}" + "#### `prop_type`" ] }, { "cell_type": "code", "execution_count": 24, - "id": "c883651a-7959-40ec-8bfe-2935f7f8e312", + "id": "09750ef1-1eba-4561-b5ad-b05f2a3b5875", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_762/2225036280.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + " df[\"prop_type\"] = df[\"prop_type\"].str.replace(\")\", \"\")\n" + ] + } + ], "source": [ - "# using new dictionary to replace values in the bus desc col\n", - "df.replace({\"bus_desc\": new_dict}, inplace=True)" + "# clearning the bus desc/prop_type col.\n", + "# removing the )\n", + "df[\"prop_type\"] = df[\"prop_type\"].str.replace(\")\", \"\")" ] }, { "cell_type": "code", "execution_count": 25, - "id": "3bf65796-704b-4309-853e-08c9d1159bee", + "id": "e298fe1d-ab31-4cae-aa18-309d349b4b78", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['BEB',\n", - " 'estimated-CNG buses',\n", - " None,\n", + "['beb',\n", + " 'estimated-CNGbuses',\n", + " 'needs manual check',\n", " 'zero-emission',\n", - " 'CNG buses',\n", - " '15 electic, 16 hybrid',\n", + " 'cngbuses',\n", + " 'BEBs',\n", + " 'Electric\\n16(Hybrid',\n", " 'FCEB',\n", " 'Electric',\n", - " 'fuel cell electric',\n", + " 'FuelCellElectric',\n", " 'CNG',\n", - " 'fuel cell',\n", + " 'FuelCell',\n", " 'hybrid',\n", + " 'BEB',\n", " 'battery electric',\n", - " 'low emission CNG',\n", - " 'BEBs paratransit buses',\n", - " 'hybrid electric',\n", - " 'zero emission buses',\n", - " 'diesel electric hybrids',\n", - " 'hydrogen fuel cell',\n", - " '2 BEBs and 4 hydrogen fuel cell buses',\n", - " '4 fuel cell / 3 CNG',\n", + " 'lowemissionCNG',\n", + " 'cng',\n", + " 'BEBsparatransitbuses',\n", + " 'hybridelectric',\n", + " 'zeroemissionbuses',\n", + " 'dieselelectrichybrids',\n", + " 'hydrogenfuelcell',\n", + " '2BEBsand4HydrogenFuelCellBuses',\n", + " '4fuelcell/3CNG',\n", " 'estimated-cutaway vans (PM- award will not fund 68 buses',\n", - " 'hybrid electric buses',\n", - " 'CNG fueled',\n", - " 'zero emission electric',\n", - " 'hybrid electrics',\n", - " 'diesel and gas',\n", - " 'diesel-electric hybrids',\n", + " 'hybridelectricbuses',\n", + " 'CNGfueled',\n", + " 'zeroemissionelectric',\n", + " 'hybridelectrics',\n", + " 'dieselandgas',\n", + " 'diesel-electrichybrids',\n", " 'propane',\n", " 'electric',\n", " 'diesel-electric',\n", - " 'propane buses',\n", - " '1:CNGbus ;2 cutaway CNG buses',\n", - " 'zero emission',\n", - " 'propaned powered vehicles']" + " 'propanebuses',\n", + " '1:CNGbus;2cutawayCNGbuses',\n", + " 'zeroemission',\n", + " 'propanedpoweredvehicles']" ] }, "execution_count": 25, @@ -1092,191 +696,73 @@ } ], "source": [ - "# confirming the bus desc values were replaced as indeded.\n", - "list(df.bus_desc.unique())" + "list(df[\"prop_type\"].unique())" ] }, { "cell_type": "code", "execution_count": 26, - "id": "7a4d910e-af55-46b0-a92c-aa52e1a892e8", + "id": "03f52229-29ad-4551-aa70-a9d53124d61e", "metadata": {}, "outputs": [], "source": [ - "# bus count for row 12 needs to be adjusted to 31 instead of 15\n", - "df.loc[12, \"bus_count\"] = 31" + "# stripping the values in the bus desc col\n", + "df[\"prop_type\"] = df[\"prop_type\"].str.strip()" ] }, { "cell_type": "code", "execution_count": 27, - "id": "edfd4b23-e384-47a1-bb7b-461fd04c2d32", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "state NC\n", - "project_sponsor City of Charlotte - Charlotte Area Transit System\n", - "project_title Charlotte Area Transit System's Sustainable Fl...\n", - "description The city of Charlotte will receive funding to ...\n", - "funding 30890413\n", - "#_of_buses 15(Electric)\\n16(Hybrid)\n", - "project_type Bus / Chargers / Equipment\n", - "propulsion_type Zero / Low\n", - "area_served Large Urban\n", - "congressional_districts NC-008 ; NC-012 ; NC-013 ; NC-014 ; SC-005\n", - "fta_region 4\n", - "bus/low-no_program Bus\n", - "bus_count 31\n", - "bus_desc 15 electic, 16 hybrid\n", - "Name: 12, dtype: object" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# confirming the change\n", - "df.loc[12]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "ca77c135-1168-489e-802e-402c614bbb77", - "metadata": {}, - "outputs": [], - "source": [ - "# using str.lower() on project type\n", - "df[\"project_type\"] = df[\"project_type\"].str.lower()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "3d9e2fe8-1f28-4d14-8a6d-f286a4641b2d", - "metadata": {}, - "outputs": [], - "source": [ - "# using str.lower() on project type\n", - "df[\"project_type\"] = df[\"project_type\"].str.replace(\" \", \"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "cd50c69d-5fad-4a9b-b88a-87d0191778fe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['\\tbus/facility',\n", - " 'bus',\n", - " 'bus/chargers',\n", - " 'bus/chargers/equipment',\n", - " 'bus/chargers/other',\n", - " 'bus/equipment',\n", - " 'bus/facilitiy',\n", - " 'bus/facility',\n", - " 'bus/facility/chargers',\n", - " 'bus/facility/chargers/equipment',\n", - " 'bus/facility/equipment',\n", - " 'bus/facility/equipment/other',\n", - " 'bus/facility/other',\n", - " 'bus/other',\n", - " 'chargers',\n", - " 'chargers/equipment',\n", - " 'facilities',\n", - " 'facility',\n", - " 'facility/chargers',\n", - " 'facility/chargers/equipment',\n", - " 'facility/equipment']" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# confirming lower and replace worked as intended\n", - "list(df[\"project_type\"].sort_values(ascending=True).unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "2014394c-33d9-44e9-b162-ab21988d9e8d", + "id": "bba62d67-35ab-4334-898b-f7581b40c574", "metadata": {}, "outputs": [], "source": [ - "# some values still need to get adjusted. will use a short dictionary to fix\n", - "new_type = {\n", - " \"\\tbus/facility\": \"bus/facility\",\n", - " \"bus/facilitiy\": \"bus/facility\",\n", - " \"facilities\": \"facility\",\n", + "# creating a dictionary to add spaces back to the values\n", + "spaces = {\n", + " \"beb\": \"BEB\",\n", + " \"estimated-CNGbuses\": \"estimated-CNG buses\",\n", + " \"cngbuses\": \"CNG buses\",\n", + " \"BEBs\": \"BEB\",\n", + " \"Electric\\n16(Hybrid\": \"15 electic, 16 hybrid\",\n", + " \"FuelCellElectric\": \"fuel cell electric\",\n", + " \"FuelCell\": \"fuel cell\",\n", + " \"lowemissionCNG\": \"low emission CNG\",\n", + " \"cng\": \"CNG\",\n", + " \"BEBsparatransitbuses\": \"BEBs paratransit buses\",\n", + " \"hybridelectric\": \"hybrid electric\",\n", + " \"zeroemissionbuses\": \"zero emission buses\",\n", + " \"dieselelectrichybrids\": \"diesel electric hybrids\",\n", + " \"hydrogenfuelcell\": \"hydrogen fuel cell\",\n", + " \"2BEBsand4HydrogenFuelCellBuses\": \"2 BEBs and 4 hydrogen fuel cell buses\",\n", + " \"4fuelcell/3CNG\": \"4 fuel cell / 3 CNG\",\n", + " \"hybridelectricbuses\": \"hybrid electric buses\",\n", + " \"CNGfueled\": \"CNG fueled\",\n", + " \"zeroemissionelectric\": \"zero emission electric\",\n", + " \"hybridelectrics\": \"hybrid electrics\",\n", + " \"dieselandgas\": \"diesel and gas\",\n", + " \"diesel-electrichybrids\": \"diesel-electric hybrids\",\n", + " \"propanebuses\": \"propane buses\",\n", + " \"1:CNGbus;2cutawayCNGbuses\": \"1:CNGbus ;2 cutaway CNG buses\",\n", + " \"zeroemission\": \"zero emission\",\n", + " \"propanedpoweredvehicles\": \"propaned powered vehicles\",\n", "}" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "a6b0eeb6-cb94-4bb8-82ac-ba3c3763a3aa", + "execution_count": 28, + "id": "c883651a-7959-40ec-8bfe-2935f7f8e312", "metadata": {}, "outputs": [], "source": [ - "# using replace() with the dictionary to replace keys in project type col\n", - "# syntax df.replace({'bus_desc': new_dict}, inplace=True)\n", - "df.replace({\"project_type\": new_type}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "7bb2161f-c0c6-43c8-a2b5-4643eea6f2a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bus',\n", - " 'bus/chargers',\n", - " 'bus/chargers/equipment',\n", - " 'bus/chargers/other',\n", - " 'bus/equipment',\n", - " 'bus/facility',\n", - " 'bus/facility/chargers',\n", - " 'bus/facility/chargers/equipment',\n", - " 'bus/facility/equipment',\n", - " 'bus/facility/equipment/other',\n", - " 'bus/facility/other',\n", - " 'bus/other',\n", - " 'chargers',\n", - " 'chargers/equipment',\n", - " 'facility',\n", - " 'facility/chargers',\n", - " 'facility/chargers/equipment',\n", - " 'facility/equipment']" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# double checking to ensure dictionary reaplce works.\n", - "list(df[\"project_type\"].sort_values(ascending=True).unique())" + "# using new dictionary to replace values in the bus desc col\n", + "df.replace({\"prop_type\": spaces}, inplace=True)" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "8fbe2161-8eb1-4179-9af1-e566e909314b", + "execution_count": 29, + "id": "3cb53f35-57e0-45cc-a58c-92a294e3bdbc", "metadata": {}, "outputs": [ { @@ -1309,1960 +795,388 @@ " 'hybrid electrics',\n", " 'hydrogen fuel cell',\n", " 'low emission CNG',\n", + " 'needs manual check',\n", " 'propane',\n", " 'propane buses',\n", " 'propaned powered vehicles',\n", " 'zero emission',\n", " 'zero emission buses',\n", " 'zero emission electric',\n", - " 'zero-emission',\n", - " None]" + " 'zero-emission']" ] }, - "execution_count": 34, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" - } - ], - "source": [ - "## Cleaning up the bus_desc col\n", - "list(df.bus_desc.sort_values().unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "180f7556-f7a1-423a-9988-36230793befc", - "metadata": {}, - "outputs": [], - "source": [ - "bus_dict = {\n", - " \"BEBs paratransit buses\": \"BEB\",\n", - " \"CNG buses\": \"CNG\",\n", - " \"CNG fueled\": \"CNG\",\n", - " \"Electric\": \"electrc (not specified)\",\n", - " \"battery electric\": \"BEB\",\n", - " \"diesel electric hybrids\": \"diesel-electric hybrids\",\n", - " \"diesel-electric\": \"diesel-electric hybrids\",\n", - " \"electric\": \"electrc (not specified)\",\n", - " \"estimated-CNG buses\": \"CNG\",\n", - " \"fuel cell\": \"FCEB\",\n", - " \"fuel cell electric\": \"FCEB\",\n", - " \"hybrid\": \"hybrid electric\",\n", - " \"hybrid electric buses\": \"hybrid electric\",\n", - " \"hybrid electrics\": \"hybrid electric\",\n", - " \"low emission CNG\": \"CNG\",\n", - " \"propane buses\": \"propane\",\n", - " \"propaned powered vehicles\": \"propane\",\n", - " \"zero emission\": \"zero-emission bus (not specified)\",\n", - " \"zero emission buses\": \"zero-emission bus (not specified)\",\n", - " \"zero emission electric\": \"zero-emission bus (not specified)\",\n", - " \"zero-emission\": \"zero-emission bus (not specified)\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "90e68ec0-2da5-4d7d-8804-c6e7f2fb97a6", - "metadata": {}, - "outputs": [], - "source": [ - "# repalcing values in bus_desc with bus_dict dictionary\n", - "df.replace({\"bus_desc\": bus_dict}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "23e0ccf3-c762-4be8-9db3-494317d229cf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['BEB',\n", - " 'CNG',\n", - " None,\n", - " 'zero-emission bus (not specified)',\n", - " '15 electic, 16 hybrid',\n", - " 'FCEB',\n", - " 'electrc (not specified)',\n", - " 'hybrid electric',\n", - " 'diesel-electric hybrids',\n", - " 'hydrogen fuel cell',\n", - " '2 BEBs and 4 hydrogen fuel cell buses',\n", - " '4 fuel cell / 3 CNG',\n", - " 'estimated-cutaway vans (PM- award will not fund 68 buses',\n", - " 'diesel and gas',\n", - " 'propane',\n", - " '1:CNGbus ;2 cutaway CNG buses']" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# list of unique bus desc values reduced.\n", - "list(df.bus_desc.unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "0ab57cb2-32eb-457d-983e-650c3323d881", - "metadata": {}, - "outputs": [], - "source": [ - "# rename bus_desc col to propulsion_type\n", - "df = df.rename(columns={\"bus_desc\": \"bus_type\"})" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "6fd15f8c-8ae3-47b3-97b8-bdd6f9ee7290", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", - " '#_of_buses', 'project_type', 'propulsion_type', 'area_served',\n", - " 'congressional_districts', 'fta_region', 'bus/low-no_program',\n", - " 'bus_count', 'bus_type'],\n", - " dtype='object')" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# confirm column was renamed\n", - "df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "6873bdc5-30ff-4350-bd6b-e6d0bb16fb05", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['zero',\n", - " 'low',\n", - " 'Low',\n", - " 'Zero',\n", - " 'Zero / Low',\n", - " 'combined',\n", - " 'Traditional',\n", - " 'zero/traditional',\n", - " 'Zero / Low / Traditional',\n", - " 'zero / low',\n", - " 'Zero / Traditional',\n", - " 'zero/low/traditional',\n", - " 'low/traditional',\n", - " 'other',\n", - " 'Other']" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## checking existing propulsion_type column\n", - "list(df.propulsion_type.unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "6d7d9992-fc2c-473f-bad3-f5dbbb68731f", - "metadata": {}, - "outputs": [], - "source": [ - "# make values in prop_type col lower case and remove spaces\n", - "df[\"propulsion_type\"] = df[\"propulsion_type\"].str.lower()\n", - "df[\"propulsion_type\"] = df[\"propulsion_type\"].str.replace(\" \", \"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "f2d674e6-5f5d-4b80-a273-a862862636d0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['zero',\n", - " 'low',\n", - " 'zero/low',\n", - " 'combined',\n", - " 'traditional',\n", - " 'zero/traditional',\n", - " 'zero/low/traditional',\n", - " 'low/traditional',\n", - " 'other']" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(df.propulsion_type.unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "e8e5e56e-18a4-4469-bbab-c4136f4fee34", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['BEB', 'CNG', None, 'zero-emission bus (not specified)',\n", - " '15 electic, 16 hybrid', 'FCEB', 'electrc (not specified)',\n", - " 'hybrid electric', 'diesel-electric hybrids', 'hydrogen fuel cell',\n", - " '2 BEBs and 4 hydrogen fuel cell buses', '4 fuel cell / 3 CNG',\n", - " 'estimated-cutaway vans (PM- award will not fund 68 buses',\n", - " 'diesel and gas', 'propane', '1:CNGbus ;2 cutaway CNG buses'],\n", - " dtype=object)" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.bus_type.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "4af08b06-7b8a-4411-9243-4bdf0b6ff87a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stateproject_sponsorproject_titledescriptionfunding#_of_busesproject_typepropulsion_typearea_servedcongressional_districtsfta_regionbus/low-no_programbus_countbus_type
0DCWashington Metropolitan Area Transit Authority...Battery-Electric Metrobus Procurement and Elec...WMATA will receive funding to convert its Cind...104000000100(beb)bus/chargerszeroLarge UrbanDC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-0113Low-No100BEB
1TXDallas Area Rapid Transit (DART)DART CNG Bus Fleet Modernization ProjectDallas Area Rapid Transit will receive funding...10300000090(estimated-CNGbuses)buslowLarge UrbanTX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T...6Low-No90CNG
2PASoutheastern Pennsylvania Transportation Autho...SEPTA Zero-Emission Bus Transition Facility Sa...The Southeastern Pennsylvania Transportation A...800000000facilityzeroLarge UrbanPA-002 ; PA-003 ; PA-004 ; PA-0053Low-No0None
3LANew Orleans Regional Transit AuthorityAccelerating Zero-Emissions Mobility for a Res...The New Orleans Regional Transit Authority wil...7143926120(zero-emission)bus/chargers/equipmentzeroLarge UrbanLA-002 ; LA-0016Low-No20zero-emission bus (not specified)
4NJNew Jersey Transit CorporationHilton Bus Garage ModernizationNew Jersey Transit will receive funding to mod...470000000facility/chargerszeroLarge Urbannj-0112Bus0None
\n", - "
" - ], - "text/plain": [ - " state project_sponsor \\\n", - "0 DC Washington Metropolitan Area Transit Authority... \n", - "1 TX Dallas Area Rapid Transit (DART) \n", - "2 PA Southeastern Pennsylvania Transportation Autho... \n", - "3 LA New Orleans Regional Transit Authority \n", - "4 NJ New Jersey Transit Corporation \n", - "\n", - " project_title \\\n", - "0 Battery-Electric Metrobus Procurement and Elec... \n", - "1 DART CNG Bus Fleet Modernization Project \n", - "2 SEPTA Zero-Emission Bus Transition Facility Sa... \n", - "3 Accelerating Zero-Emissions Mobility for a Res... \n", - "4 Hilton Bus Garage Modernization \n", - "\n", - " description funding \\\n", - "0 WMATA will receive funding to convert its Cind... 104000000 \n", - "1 Dallas Area Rapid Transit will receive funding... 103000000 \n", - "2 The Southeastern Pennsylvania Transportation A... 80000000 \n", - "3 The New Orleans Regional Transit Authority wil... 71439261 \n", - "4 New Jersey Transit will receive funding to mod... 47000000 \n", - "\n", - " #_of_buses project_type propulsion_type \\\n", - "0 100(beb) bus/chargers zero \n", - "1 90(estimated-CNGbuses) bus low \n", - "2 0 facility zero \n", - "3 20(zero-emission) bus/chargers/equipment zero \n", - "4 0 facility/chargers zero \n", - "\n", - " area_served congressional_districts fta_region \\\n", - "0 Large Urban DC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-011 3 \n", - "1 Large Urban TX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T... 6 \n", - "2 Large Urban PA-002 ; PA-003 ; PA-004 ; PA-005 3 \n", - "3 Large Urban LA-002 ; LA-001 6 \n", - "4 Large Urban nj-011 2 \n", - "\n", - " bus/low-no_program bus_count bus_type \n", - "0 Low-No 100 BEB \n", - "1 Low-No 90 CNG \n", - "2 Low-No 0 None \n", - "3 Low-No 20 zero-emission bus (not specified) \n", - "4 Bus 0 None " - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "id": "8a498a0b-4552-4c38-90cd-6e1c4045d40d", - "metadata": {}, - "source": [ - "### Need new column for bus size type via list and function\n", - "cutaway, 40ft etc" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "6053159a-62b1-4866-a13e-47fabf4576b4", - "metadata": {}, - "outputs": [], - "source": [ - "bus_size = [\n", - " \"standard\",\n", - " \"40 foot\",\n", - " \"40-foot\",\n", - " \"40ft\",\n", - " \"articulated\",\n", - " \"cutaway\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "3500afae-6358-435f-9d25-1854b9f16634", - "metadata": {}, - "outputs": [], - "source": [ - "# Function to match keywords\n", - "def find_bus_size_type(description):\n", - " for keyword in bus_size:\n", - " if keyword in description.lower():\n", - " return keyword\n", - " return \"not specified\"" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "b7a0ae00-3a85-4e95-ae94-7ffeb55c3a8d", - "metadata": {}, - "outputs": [], - "source": [ - "df[\"bus_size_type\"] = df[\"description\"].apply(find_bus_size_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "7f5bbc73-5656-4133-8bf0-5cc43319380d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", - " '#_of_buses', 'project_type', 'propulsion_type', 'area_served',\n", - " 'congressional_districts', 'fta_region', 'bus/low-no_program',\n", - " 'bus_count', 'bus_type', 'bus_size_type'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "array(['not specified', 'cutaway'], dtype=object)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stateproject_sponsorproject_titledescriptionfunding#_of_busesproject_typepropulsion_typearea_servedcongressional_districtsfta_regionbus/low-no_programbus_countbus_typebus_size_type
0DCWashington Metropolitan Area Transit Authority...Battery-Electric Metrobus Procurement and Elec...WMATA will receive funding to convert its Cind...104000000100(beb)bus/chargerszeroLarge UrbanDC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-0113Low-No100BEBnot specified
1TXDallas Area Rapid Transit (DART)DART CNG Bus Fleet Modernization ProjectDallas Area Rapid Transit will receive funding...10300000090(estimated-CNGbuses)buslowLarge UrbanTX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T...6Low-No90CNGnot specified
2PASoutheastern Pennsylvania Transportation Autho...SEPTA Zero-Emission Bus Transition Facility Sa...The Southeastern Pennsylvania Transportation A...800000000facilityzeroLarge UrbanPA-002 ; PA-003 ; PA-004 ; PA-0053Low-No0Nonenot specified
3LANew Orleans Regional Transit AuthorityAccelerating Zero-Emissions Mobility for a Res...The New Orleans Regional Transit Authority wil...7143926120(zero-emission)bus/chargers/equipmentzeroLarge UrbanLA-002 ; LA-0016Low-No20zero-emission bus (not specified)not specified
4NJNew Jersey Transit CorporationHilton Bus Garage ModernizationNew Jersey Transit will receive funding to mod...470000000facility/chargerszeroLarge Urbannj-0112Bus0Nonenot specified
\n", - "
" - ], - "text/plain": [ - " state project_sponsor \\\n", - "0 DC Washington Metropolitan Area Transit Authority... \n", - "1 TX Dallas Area Rapid Transit (DART) \n", - "2 PA Southeastern Pennsylvania Transportation Autho... \n", - "3 LA New Orleans Regional Transit Authority \n", - "4 NJ New Jersey Transit Corporation \n", - "\n", - " project_title \\\n", - "0 Battery-Electric Metrobus Procurement and Elec... \n", - "1 DART CNG Bus Fleet Modernization Project \n", - "2 SEPTA Zero-Emission Bus Transition Facility Sa... \n", - "3 Accelerating Zero-Emissions Mobility for a Res... \n", - "4 Hilton Bus Garage Modernization \n", - "\n", - " description funding \\\n", - "0 WMATA will receive funding to convert its Cind... 104000000 \n", - "1 Dallas Area Rapid Transit will receive funding... 103000000 \n", - "2 The Southeastern Pennsylvania Transportation A... 80000000 \n", - "3 The New Orleans Regional Transit Authority wil... 71439261 \n", - "4 New Jersey Transit will receive funding to mod... 47000000 \n", - "\n", - " #_of_buses project_type propulsion_type \\\n", - "0 100(beb) bus/chargers zero \n", - "1 90(estimated-CNGbuses) bus low \n", - "2 0 facility zero \n", - "3 20(zero-emission) bus/chargers/equipment zero \n", - "4 0 facility/chargers zero \n", - "\n", - " area_served congressional_districts fta_region \\\n", - "0 Large Urban DC-001 ; MD-004 ; MD-008 ; VA-008 ; VA-011 3 \n", - "1 Large Urban TX-003 ; TX-004 ; TX-005 ; TX-006 ; TX-024 ; T... 6 \n", - "2 Large Urban PA-002 ; PA-003 ; PA-004 ; PA-005 3 \n", - "3 Large Urban LA-002 ; LA-001 6 \n", - "4 Large Urban nj-011 2 \n", - "\n", - " bus/low-no_program bus_count bus_type \\\n", - "0 Low-No 100 BEB \n", - "1 Low-No 90 CNG \n", - "2 Low-No 0 None \n", - "3 Low-No 20 zero-emission bus (not specified) \n", - "4 Bus 0 None \n", - "\n", - " bus_size_type \n", - "0 not specified \n", - "1 not specified \n", - "2 not specified \n", - "3 not specified \n", - "4 not specified " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(df.columns, df.bus_size_type.unique(), df.head())" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "id": "e5d6ecca-9381-457d-8b7e-8260d60a7445", - "metadata": {}, - "outputs": [], - "source": [ - "## new column for extracted_propulsion_type\n", - "propulsion_list = [\n", - " \"battery-electric\",\n", - " \"Battery electric\",\n", - " \"Battery-Electric\",\n", - " \"Fuel cell electric\",\n", - " \"Wired electric\",\n", - " \"hydrogen fuel cell\",\n", - " \"cng\",\n", - " \"CNG\",\n", - " \"Propane\",\n", - " \"conventional\",\n", - " \"electric hybrid\",\n", - " \"Compressed natural gas\",\n", - " \"Hybrid\",\n", - " \"Hybrid electric\",\n", - " \"Hybrid-electric\",\n", - " \"Zero emission\",\n", - " \"Zero-emission\"\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "id": "3e039ec1-2e1a-423c-89ea-4e3edde26bcd", - "metadata": {}, - "outputs": [], - "source": [ - "# function\n", - "def find_propulsion_type(description):\n", - " for keyword in propulsion_list:\n", - " if keyword.lower() in description.lower():\n", - " return keyword\n", - " return \"not specified\"" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "id": "4c4f4da5-60e0-4881-a718-989d2e925607", - "metadata": {}, - "outputs": [], - "source": [ - "df[\"extracted_propulsion_type\"] = df[\"description\"].apply(find_propulsion_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "id": "d4bee3cf-e14b-4479-8108-d0f44756c310", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", - " '#_of_buses', 'project_type', 'propulsion_type', 'area_served',\n", - " 'congressional_districts', 'fta_region', 'bus/low-no_program',\n", - " 'bus_count', 'bus_type', 'bus_size_type', 'extracted_propulsion_type'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "not specified 40\n", - "battery-electric 33\n", - "Compressed natural gas 12\n", - "Hybrid 12\n", - "Zero-emission 9\n", - "Propane 6\n", - "cng 5\n", - "electric hybrid 5\n", - "Zero emission 3\n", - "Battery electric 3\n", - "Fuel cell electric 2\n", - "Name: extracted_propulsion_type, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(\n", - " df.columns,\n", - " df.extracted_propulsion_type.value_counts(),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "id": "384488e9-cc52-42eb-bc3b-9a00bc2e58b2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stateproject_sponsorproject_titledescriptionfunding#_of_busesproject_typepropulsion_typearea_servedcongressional_districtsfta_regionbus/low-no_programbus_countbus_typebus_size_typeextracted_propulsion_type
9OHMETRO Regional Transit AuthorityAkron METRO RTA Maintenance and Operations Fac...The METRO Regional Transit Authority will rece...378081130facilitylowLarge UrbanOH-0135Bus0Nonenot specifiednot specified
15CANorth County Transit District (NCTD)Accelerate Clean Transit (ACT)The North County Transit District will receive...2933024323(FCEB)buszeroLarge UrbanCA-049 ; CA-0509Low-No23FCEBnot specifiednot specified
22IACity of Iowa CityIowa City Zero-Emission Transit Operations Mai...Iowa City will receive funding to buy electric...232805464(BEBs)bus/facility/chargerszeroSmall Urbania-0017Low-No4BEBnot specifiednot specified
24GAGeorgia State UniversityCollege Town, Downtown: Transitioning to an Al...Georgia State University's Panther Express wil...2228674518bus/facility/chargerszeroLarge UrbanGA-0054Low-No18Nonenot specifiednot specified
35FLCity of OcalaElectric Bus Vehicle Purchase and Expansion of...The city of Ocala's SunTran transit system wil...1616682231bus/facility/chargers/equipmentzeroSmall UrbanFL-003 ; FL-0064Low-No31Nonecutawaynot specified
36SCSouth Carolina Department of Transportation on...SCDOT Vehicle Replacement ProjectThe South Carolina Department of Transportatio...15423904160bustraditionalRuralSC-All ; SC-001 ; SC-002 ; SC-003 ; SC-004 ; S...4Bus160Nonenot specifiednot specified
39ILIllinois Department of Transportation on behal...Illinois DOT Statewide Paratransit Vehicle Rep...The Illinois Department of Transportation will...12600000134bustraditionalstatewideIL-002 ; IL-011 ; IL-012 ; IL-013 ; IL-014 ; I...5Bus134Nonecutawaynot specified
42KYKentucky Transportation Cabinet on behalf of 1...Consolidated Proposal for 10 Transit Agencies ...The Kentucky Transportation Cabinet will recei...1157090642bus/facility/othertraditionalRuralKY-001 ; KY-002 ; KY-003 ; KY-004 ; KY-0054Bus42Nonenot specifiednot specified
44MIMichigan Department of Transportation on behal...Transit Facility Repair and Expansion Project ...The Michigan Department of Transportation will...107000000facilityzero/traditionalRuralMI-004 ; MI-006 ; MI-0075Bus0Nonenot specifiednot specified
50TXBrazos Transit DistrictGetting to Zero - Brazos Transit District's Ze...The Brazos Transit District will receive fundi...965064611bus/chargerszeroSmall Urbantx-0106Bus11Nonenot specifiednot specified
55MECity of Bangor, Community ConnectorCold Bus Barn Rehabilitation for ElectrificationThe city of Bangor's Community Connector will ...78523200facilitytraditionalSmall Urbanme-0021Bus0Nonenot specifiednot specified
59MNCity of Rochester, MinnesotaDesign and Construction of Park-and-Ride Trans...The city of Rochester will receive funding to ...74400000facilitytraditionalSmall Urbanmn-0015Bus0Nonenot specifiednot specified
60UTUtah Department of Transportation on behalf of...Park City's Comprehensive Bus Stop Redesign an...The Utah Department of Transportation will rec...73931830facilityzero/traditionalRuralUT-0018Bus0Nonenot specifiednot specified
65MSCity of HattiesburgHub City Transit's Battery Electric Bus & Char...Hattiesburg's Hub City Transit will receive fu...64553257bus/chargerszeroSmall Urbanms-0044Low-No7Nonenot specifiednot specified
66OROregon Department of Transportation on behalf ...Hood River County Transportation District Zero...The Oregon Department of Transportation on beh...64248086bus/facility/chargerszeroRuralOR-00310Bus6Nonenot specifiednot specified
70OKOklahoma Department of Transportation on behal...Bus Facilities ExpansionThe Oklahoma Department of Transportation on b...60000000facilitytraditionalRuralok-0036Bus0Nonenot specifiednot specified
71NYNew York City Department of TransportationJerome Avenue Bus Stop ImprovementsThe New York City Department of Transportation...60000000facilityzero/low/traditionalLarge UrbanNY-013 ; NY-0152Bus0Nonenot specifiednot specified
72NMNew Mexico Department of Transportation on beh...Phase One Construction of NCRTD's Jim West Reg...The North Central Regional Transit District wi...59455530facilityzero/traditionalRuralNM-0036Bus0Nonenot specifiednot specified
73NYSeneca Nation of IndiansSeneca Nation Department of Transportation-All...The Seneca Nation Department of Transportation...58832000facilitytraditionalRuralny-0232Bus0Nonenot specifiednot specified
75TXPort Arthur Transit/City of Port ArthurPort Arthur Transit Zero-Emission Bus DeploymentPort Arthur Transit will receive funding to bu...50017004(zeroemissionelectric)bus/chargerszeroSmall Urbantx-0146Low-No4zero-emission bus (not specified)not specifiednot specified
76WASkagit Transit SystemSkagit Transit Maintenance Operations and Admi...The Skagit Transit System will receive funding...50000000facilityzeroSmall UrbanWA-00210Bus0Nonenot specifiednot specified
77TNNashville Metropolitan Transit AuthorityHickory Hollow Transit Center and Park & Ride ...The Nashville Metropolitan Transit Authority (...50000000facilitytraditionalLarge Urbantn-0054Bus0Nonenot specifiednot specified
80VAVirginia Department of Rail and Public Transpo...VA Rural Transit Asset Management and Equity P...The Virginia Department of Rail and Public Tra...469001039buslow/traditionalRuralVA-001 ; VA-002 ; VA-004 ; VA-005 ; VA-009 ; W...3Bus39Nonenot specifiednot specified
82OHGreater Dayton Regional Transit AuthorityGreater Dayton RTA, 600 Longworth Facility, Ro...The Greater Dayton Regional Transit Authority ...44929040facilityzero/traditionalLarge Urbanoh-0105Bus0Nonenot specifiednot specified
83OHWestern Reserve Transit AuthorityWestern Reserve Transit Authority\\nThe Western Reserve Transit Authority will rec...43135520facilitytraditionalLarge Urbanoh-0065Bus0Nonenot specifiednot specified
91NCNorth Carolina Department of Transportation on...ICPTA Transit Outpost and Propane Fueling Faci...The North Carolina Department of Transportatio...33260670facility/equipmentlowruralNC-001 ; NC-0034Low-No0Nonenot specifiednot specified
92WAWashington State Department of Transportation ...Procurement of vehicles and bus-facilities equ...The Washington State Department of Transportat...33036009(dieselandgas)bus/equipmenttraditionalRuralWA-004 ; WA-00610Bus9diesel and gasnot specifiednot specified
97CAState of California on behalf of Kern Regional...Purchase of Fifteen (15) Replacement Cutaway B...The State of California, on behalf of Kern Reg...293250015bustraditionalRuralCA-0209Bus15Nonecutawaynot specified
104NCTown of Chapel HillTown of Chapel Hill Bus Stop Accessibility and...The town of Chapel Hill will receive funding t...21600000facilitylow/traditionalLarge Urbannc-0044Bus0Nonenot specifiednot specified
108COThe Colorado Department of Transportation (CDO...ECO Transit Vehicle Storage Facility Electrifi...The Colorado Department of Transportation, on ...15066180facilityzero/traditionalRuralCO-002 ; CO-0038Bus0Nonenot specifiednot specified
110INFort Wayne Public Transportation Corporation (...Fort Wayne Citilink Facility Rehabilitations: ...Fort Wayne Public Transportation Corporation's...12800000facilitylow/traditionalLarge Urbanin-0035Bus0Nonenot specifiednot specified
112NCCity of High PointHigh Point Transit System (HPTS) Maintenance F...High Point Transit System will receive funding...12000000facilityotherSmall Urbannc-0064Bus0Nonenot specifiednot specified
115ILMadison County Mass Transit DistrictHeavy Duty 40-Foot Bus ReplacementThe Madison County Mass Transit District will ...10800002bustraditionalLarge UrbanIL-012 ; IL-013 ; IL-0155Bus2Nonenot specifiednot specified
116CACity of Norwalk - Norwalk Transit SystemNTS Bus Stop Equity ProjectThe city of Norwalk's Norwalk Transit System w...10553650facilityotherLarge UrbanCA-0389Bus0Nonenot specifiednot specified
118SDSouth Dakota Department of Transportation on b...Replacement of Aberdeen Ride Line fleet buses ...The South Dakota Department of Transportation,...10067509bustraditionalRuralSD-0018Bus9Nonenot specifiednot specified
122MNWhite Earth Reservation Business CommitteeWhite Earth Public Transit to replace 4 of the...White Earth Public Transit will receive fundin...7231714(propane)bus/equipmentlowRuralmn-0075Low-No4propanenot specifiednot specified
124MIMichigan Department of Transportation on behal...Transit vehicle replacement, facility expansio...The Michigan Department of Transportation, on ...5140022bus/facility/equipment/othertraditionalRuralMI-002 ; MI-0055Bus2Nonenot specifiednot specified
125SDSouth Dakota Department of Transportation on b...Construction of a new bus storage facility loc...The South Dakota Department of Transportation ...3200000facilitytraditionalRuralSD-0018Bus0Nonenot specifiednot specified
127NCNorth Carolina Department of Transportation on...Columbus County Transportation's Facility Expa...The North Carolina Department of Transportatio...2808000facilitytraditionalRuralnc-0074Bus0Nonenot specifiednot specified
128COThe Colorado Department of Transportation (CDO...SMART Vanpool Vehicle Replacement ProjectThe Colorado Department of Transportation (CDO...2337603bustraditionalRuralCO-0038Bus3Nonenot specifiednot specified
\n", - "
" - ], + } + ], + "source": [ + "list(df['prop_type'].sort_values().unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "9bd26f13-146d-43cc-9a23-23032ac2b173", + "metadata": {}, + "outputs": [], + "source": [ + "prop_type_dict ={'15 electic, 16 hybrid': 'mix (zero and low emission buses)',\n", + " '1:CNGbus ;2 cutaway CNG buses': 'mix (zero and low emission buses)',\n", + " '2 BEBs and 4 hydrogen fuel cell buses': 'mix (BEB and FCEB)',\n", + " '4 fuel cell / 3 CNG': 'mix (zero and low emission buses)',\n", + " 'BEBs paratransit buses': \"BEB\",\n", + "\"CNG buses\": \"CNG\",\n", + " \"CNG fueled\": \"CNG\",\n", + " \"Electric\": \"electric (not specified)\",\n", + " \"battery electric\": \"BEB\",\n", + " 'diesel and gas': 'mix (low emission)',\n", + " 'diesel electric hybrids':'low emission (hybrid)',\n", + " 'diesel-electric':'low emission (hybrid)',\n", + " 'diesel-electric hybrids':'low emission (hybrid)',\n", + " 'electric':\"electric (not specified)\",\n", + " 'estimated-CNG buses': \"CNG\",\n", + " 'estimated-cutaway vans (PM- award will not fund 68 buses': 'not specified',\n", + " 'fuel cell': 'FCEB',\n", + " 'fuel cell electric': 'FCEB',\n", + " 'hybrid':'low emission (hybrid)',\n", + " 'hybrid electric':'low emission (hybrid)',\n", + " 'hybrid electric buses':'low emission (hybrid)',\n", + " 'hybrid electrics':'low emission (hybrid)',\n", + " 'hydrogen fuel cell': 'FCEB',\n", + " 'low emission CNG': 'CNG',\n", + " 'propane':'low emission (propane)',\n", + " 'propane buses':'low emission (propane)',\n", + " 'propaned powered vehicles':'low emission (propane)',\n", + " 'zero emission':\"zero-emission bus (not specified)\",\n", + " 'zero emission buses':\"zero-emission bus (not specified)\",\n", + " 'zero emission electric':\"zero-emission bus (not specified)\",\n", + " 'zero-emission':\"zero-emission bus (not specified)\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "90e68ec0-2da5-4d7d-8804-c6e7f2fb97a6", + "metadata": {}, + "outputs": [], + "source": [ + "# repalcing values in prop type with prop type dictionary\n", + "df.replace({\"prop_type\": prop_type_dict}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "4af08b06-7b8a-4411-9243-4bdf0b6ff87a", + "metadata": {}, + "outputs": [ + { + "data": { "text/plain": [ - " state project_sponsor \\\n", - "9 OH METRO Regional Transit Authority \n", - "15 CA North County Transit District (NCTD) \n", - "22 IA City of Iowa City \n", - "24 GA Georgia State University \n", - "35 FL City of Ocala \n", - "36 SC South Carolina Department of Transportation on... \n", - "39 IL Illinois Department of Transportation on behal... \n", - "42 KY Kentucky Transportation Cabinet on behalf of 1... \n", - "44 MI Michigan Department of Transportation on behal... \n", - "50 TX Brazos Transit District \n", - "55 ME City of Bangor, Community Connector \n", - "59 MN City of Rochester, Minnesota \n", - "60 UT Utah Department of Transportation on behalf of... \n", - "65 MS City of Hattiesburg \n", - "66 OR Oregon Department of Transportation on behalf ... \n", - "70 OK Oklahoma Department of Transportation on behal... \n", - "71 NY New York City Department of Transportation \n", - "72 NM New Mexico Department of Transportation on beh... \n", - "73 NY Seneca Nation of Indians \n", - "75 TX Port Arthur Transit/City of Port Arthur \n", - "76 WA Skagit Transit System \n", - "77 TN Nashville Metropolitan Transit Authority \n", - "80 VA Virginia Department of Rail and Public Transpo... \n", - "82 OH Greater Dayton Regional Transit Authority \n", - "83 OH Western Reserve Transit Authority \n", - "91 NC North Carolina Department of Transportation on... \n", - "92 WA Washington State Department of Transportation ... \n", - "97 CA State of California on behalf of Kern Regional... \n", - "104 NC Town of Chapel Hill \n", - "108 CO The Colorado Department of Transportation (CDO... \n", - "110 IN Fort Wayne Public Transportation Corporation (... \n", - "112 NC City of High Point \n", - "115 IL Madison County Mass Transit District \n", - "116 CA City of Norwalk - Norwalk Transit System \n", - "118 SD South Dakota Department of Transportation on b... \n", - "122 MN White Earth Reservation Business Committee \n", - "124 MI Michigan Department of Transportation on behal... \n", - "125 SD South Dakota Department of Transportation on b... \n", - "127 NC North Carolina Department of Transportation on... \n", - "128 CO The Colorado Department of Transportation (CDO... \n", - "\n", - " project_title \\\n", - "9 Akron METRO RTA Maintenance and Operations Fac... \n", - "15 Accelerate Clean Transit (ACT) \n", - "22 Iowa City Zero-Emission Transit Operations Mai... \n", - "24 College Town, Downtown: Transitioning to an Al... \n", - "35 Electric Bus Vehicle Purchase and Expansion of... \n", - "36 SCDOT Vehicle Replacement Project \n", - "39 Illinois DOT Statewide Paratransit Vehicle Rep... \n", - "42 Consolidated Proposal for 10 Transit Agencies ... \n", - "44 Transit Facility Repair and Expansion Project ... \n", - "50 Getting to Zero - Brazos Transit District's Ze... \n", - "55 Cold Bus Barn Rehabilitation for Electrification \n", - "59 Design and Construction of Park-and-Ride Trans... \n", - "60 Park City's Comprehensive Bus Stop Redesign an... \n", - "65 Hub City Transit's Battery Electric Bus & Char... \n", - "66 Hood River County Transportation District Zero... \n", - "70 Bus Facilities Expansion \n", - "71 Jerome Avenue Bus Stop Improvements \n", - "72 Phase One Construction of NCRTD's Jim West Reg... \n", - "73 Seneca Nation Department of Transportation-All... \n", - "75 Port Arthur Transit Zero-Emission Bus Deployment \n", - "76 Skagit Transit Maintenance Operations and Admi... \n", - "77 Hickory Hollow Transit Center and Park & Ride ... \n", - "80 VA Rural Transit Asset Management and Equity P... \n", - "82 Greater Dayton RTA, 600 Longworth Facility, Ro... \n", - "83 Western Reserve Transit Authority\\n \n", - "91 ICPTA Transit Outpost and Propane Fueling Faci... \n", - "92 Procurement of vehicles and bus-facilities equ... \n", - "97 Purchase of Fifteen (15) Replacement Cutaway B... \n", - "104 Town of Chapel Hill Bus Stop Accessibility and... \n", - "108 ECO Transit Vehicle Storage Facility Electrifi... \n", - "110 Fort Wayne Citilink Facility Rehabilitations: ... \n", - "112 High Point Transit System (HPTS) Maintenance F... \n", - "115 Heavy Duty 40-Foot Bus Replacement \n", - "116 NTS Bus Stop Equity Project \n", - "118 Replacement of Aberdeen Ride Line fleet buses ... \n", - "122 White Earth Public Transit to replace 4 of the... \n", - "124 Transit vehicle replacement, facility expansio... \n", - "125 Construction of a new bus storage facility loc... \n", - "127 Columbus County Transportation's Facility Expa... \n", - "128 SMART Vanpool Vehicle Replacement Project \n", - "\n", - " description funding \\\n", - "9 The METRO Regional Transit Authority will rece... 37808113 \n", - "15 The North County Transit District will receive... 29330243 \n", - "22 Iowa City will receive funding to buy electric... 23280546 \n", - "24 Georgia State University's Panther Express wil... 22286745 \n", - "35 The city of Ocala's SunTran transit system wil... 16166822 \n", - "36 The South Carolina Department of Transportatio... 15423904 \n", - "39 The Illinois Department of Transportation will... 12600000 \n", - "42 The Kentucky Transportation Cabinet will recei... 11570906 \n", - "44 The Michigan Department of Transportation will... 10700000 \n", - "50 The Brazos Transit District will receive fundi... 9650646 \n", - "55 The city of Bangor's Community Connector will ... 7852320 \n", - "59 The city of Rochester will receive funding to ... 7440000 \n", - "60 The Utah Department of Transportation will rec... 7393183 \n", - "65 Hattiesburg's Hub City Transit will receive fu... 6455325 \n", - "66 The Oregon Department of Transportation on beh... 6424808 \n", - "70 The Oklahoma Department of Transportation on b... 6000000 \n", - "71 The New York City Department of Transportation... 6000000 \n", - "72 The North Central Regional Transit District wi... 5945553 \n", - "73 The Seneca Nation Department of Transportation... 5883200 \n", - "75 Port Arthur Transit will receive funding to bu... 5001700 \n", - "76 The Skagit Transit System will receive funding... 5000000 \n", - "77 The Nashville Metropolitan Transit Authority (... 5000000 \n", - "80 The Virginia Department of Rail and Public Tra... 4690010 \n", - "82 The Greater Dayton Regional Transit Authority ... 4492904 \n", - "83 The Western Reserve Transit Authority will rec... 4313552 \n", - "91 The North Carolina Department of Transportatio... 3326067 \n", - "92 The Washington State Department of Transportat... 3303600 \n", - "97 The State of California, on behalf of Kern Reg... 2932500 \n", - "104 The town of Chapel Hill will receive funding t... 2160000 \n", - "108 The Colorado Department of Transportation, on ... 1506618 \n", - "110 Fort Wayne Public Transportation Corporation's... 1280000 \n", - "112 High Point Transit System will receive funding... 1200000 \n", - "115 The Madison County Mass Transit District will ... 1080000 \n", - "116 The city of Norwalk's Norwalk Transit System w... 1055365 \n", - "118 The South Dakota Department of Transportation,... 1006750 \n", - "122 White Earth Public Transit will receive fundin... 723171 \n", - "124 The Michigan Department of Transportation, on ... 514002 \n", - "125 The South Dakota Department of Transportation ... 320000 \n", - "127 The North Carolina Department of Transportatio... 280800 \n", - "128 The Colorado Department of Transportation (CDO... 233760 \n", - "\n", - " #_of_buses project_type \\\n", - "9 0 facility \n", - "15 23(FCEB) bus \n", - "22 4(BEBs) bus/facility/chargers \n", - "24 18 bus/facility/chargers \n", - "35 31 bus/facility/chargers/equipment \n", - "36 160 bus \n", - "39 134 bus \n", - "42 42 bus/facility/other \n", - "44 0 facility \n", - "50 11 bus/chargers \n", - "55 0 facility \n", - "59 0 facility \n", - "60 0 facility \n", - "65 7 bus/chargers \n", - "66 6 bus/facility/chargers \n", - "70 0 facility \n", - "71 0 facility \n", - "72 0 facility \n", - "73 0 facility \n", - "75 4(zeroemissionelectric) bus/chargers \n", - "76 0 facility \n", - "77 0 facility \n", - "80 39 bus \n", - "82 0 facility \n", - "83 0 facility \n", - "91 0 facility/equipment \n", - "92 9(dieselandgas) bus/equipment \n", - "97 15 bus \n", - "104 0 facility \n", - "108 0 facility \n", - "110 0 facility \n", - "112 0 facility \n", - "115 2 bus \n", - "116 0 facility \n", - "118 9 bus \n", - "122 4(propane) bus/equipment \n", - "124 2 bus/facility/equipment/other \n", - "125 0 facility \n", - "127 0 facility \n", - "128 3 bus \n", - "\n", - " propulsion_type area_served \\\n", - "9 low Large Urban \n", - "15 zero Large Urban \n", - "22 zero Small Urban \n", - "24 zero Large Urban \n", - "35 zero Small Urban \n", - "36 traditional Rural \n", - "39 traditional statewide \n", - "42 traditional Rural \n", - "44 zero/traditional Rural \n", - "50 zero Small Urban \n", - "55 traditional Small Urban \n", - "59 traditional Small Urban \n", - "60 zero/traditional Rural \n", - "65 zero Small Urban \n", - "66 zero Rural \n", - "70 traditional Rural \n", - "71 zero/low/traditional Large Urban \n", - "72 zero/traditional Rural \n", - "73 traditional Rural \n", - "75 zero Small Urban \n", - "76 zero Small Urban \n", - "77 traditional Large Urban \n", - "80 low/traditional Rural \n", - "82 zero/traditional Large Urban \n", - "83 traditional Large Urban \n", - "91 low rural \n", - "92 traditional Rural \n", - "97 traditional Rural \n", - "104 low/traditional Large Urban \n", - "108 zero/traditional Rural \n", - "110 low/traditional Large Urban \n", - "112 other Small Urban \n", - "115 traditional Large Urban \n", - "116 other Large Urban \n", - "118 traditional Rural \n", - "122 low Rural \n", - "124 traditional Rural \n", - "125 traditional Rural \n", - "127 traditional Rural \n", - "128 traditional Rural \n", - "\n", - " congressional_districts fta_region \\\n", - "9 OH-013 5 \n", - "15 CA-049 ; CA-050 9 \n", - "22 ia-001 7 \n", - "24 GA-005 4 \n", - "35 FL-003 ; FL-006 4 \n", - "36 SC-All ; SC-001 ; SC-002 ; SC-003 ; SC-004 ; S... 4 \n", - "39 IL-002 ; IL-011 ; IL-012 ; IL-013 ; IL-014 ; I... 5 \n", - "42 KY-001 ; KY-002 ; KY-003 ; KY-004 ; KY-005 4 \n", - "44 MI-004 ; MI-006 ; MI-007 5 \n", - "50 tx-010 6 \n", - "55 me-002 1 \n", - "59 mn-001 5 \n", - "60 UT-001 8 \n", - "65 ms-004 4 \n", - "66 OR-003 10 \n", - "70 ok-003 6 \n", - "71 NY-013 ; NY-015 2 \n", - "72 NM-003 6 \n", - "73 ny-023 2 \n", - "75 tx-014 6 \n", - "76 WA-002 10 \n", - "77 tn-005 4 \n", - "80 VA-001 ; VA-002 ; VA-004 ; VA-005 ; VA-009 ; W... 3 \n", - "82 oh-010 5 \n", - "83 oh-006 5 \n", - "91 NC-001 ; NC-003 4 \n", - "92 WA-004 ; WA-006 10 \n", - "97 CA-020 9 \n", - "104 nc-004 4 \n", - "108 CO-002 ; CO-003 8 \n", - "110 in-003 5 \n", - "112 nc-006 4 \n", - "115 IL-012 ; IL-013 ; IL-015 5 \n", - "116 CA-038 9 \n", - "118 SD-001 8 \n", - "122 mn-007 5 \n", - "124 MI-002 ; MI-005 5 \n", - "125 SD-001 8 \n", - "127 nc-007 4 \n", - "128 CO-003 8 \n", - "\n", - " bus/low-no_program bus_count bus_type \\\n", - "9 Bus 0 None \n", - "15 Low-No 23 FCEB \n", - "22 Low-No 4 BEB \n", - "24 Low-No 18 None \n", - "35 Low-No 31 None \n", - "36 Bus 160 None \n", - "39 Bus 134 None \n", - "42 Bus 42 None \n", - "44 Bus 0 None \n", - "50 Bus 11 None \n", - "55 Bus 0 None \n", - "59 Bus 0 None \n", - "60 Bus 0 None \n", - "65 Low-No 7 None \n", - "66 Bus 6 None \n", - "70 Bus 0 None \n", - "71 Bus 0 None \n", - "72 Bus 0 None \n", - "73 Bus 0 None \n", - "75 Low-No 4 zero-emission bus (not specified) \n", - "76 Bus 0 None \n", - "77 Bus 0 None \n", - "80 Bus 39 None \n", - "82 Bus 0 None \n", - "83 Bus 0 None \n", - "91 Low-No 0 None \n", - "92 Bus 9 diesel and gas \n", - "97 Bus 15 None \n", - "104 Bus 0 None \n", - "108 Bus 0 None \n", - "110 Bus 0 None \n", - "112 Bus 0 None \n", - "115 Bus 2 None \n", - "116 Bus 0 None \n", - "118 Bus 9 None \n", - "122 Low-No 4 propane \n", - "124 Bus 2 None \n", - "125 Bus 0 None \n", - "127 Bus 0 None \n", - "128 Bus 3 None \n", - "\n", - " bus_size_type extracted_propulsion_type \n", - "9 not specified not specified \n", - "15 not specified not specified \n", - "22 not specified not specified \n", - "24 not specified not specified \n", - "35 cutaway not specified \n", - "36 not specified not specified \n", - "39 cutaway not specified \n", - "42 not specified not specified \n", - "44 not specified not specified \n", - "50 not specified not specified \n", - "55 not specified not specified \n", - "59 not specified not specified \n", - "60 not specified not specified \n", - "65 not specified not specified \n", - "66 not specified not specified \n", - "70 not specified not specified \n", - "71 not specified not specified \n", - "72 not specified not specified \n", - "73 not specified not specified \n", - "75 not specified not specified \n", - "76 not specified not specified \n", - "77 not specified not specified \n", - "80 not specified not specified \n", - "82 not specified not specified \n", - "83 not specified not specified \n", - "91 not specified not specified \n", - "92 not specified not specified \n", - "97 cutaway not specified \n", - "104 not specified not specified \n", - "108 not specified not specified \n", - "110 not specified not specified \n", - "112 not specified not specified \n", - "115 not specified not specified \n", - "116 not specified not specified \n", - "118 not specified not specified \n", - "122 not specified not specified \n", - "124 not specified not specified \n", - "125 not specified not specified \n", - "127 not specified not specified \n", - "128 not specified not specified " + "needs manual check 58\n", + "BEB 18\n", + "low emission (hybrid) 15\n", + "CNG 14\n", + "electric (not specified) 6\n", + "low emission (propane) 5\n", + "zero-emission bus (not specified) 4\n", + "FCEB 4\n", + "mix (zero and low emission buses) 3\n", + "mix (BEB and FCEB) 1\n", + "not specified 1\n", + "mix (low emission) 1\n", + "Name: prop_type, dtype: int64" ] }, - "execution_count": 117, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[df['extracted_propulsion_type']=='not specified']" + "#check work\n", + "df.prop_type.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "dc3aacbb-0dcb-4bb0-9ea7-6c2fe1d75b49", + "metadata": {}, + "source": [ + "### fix `prop_type == needs manual check`\n", + "\n", + "- subset a df of only prop type == needs manual check\n", + "- create list of keywords to check prop type\n", + "- create function to replace `needs manualc check` values with list values\n", + "- then... do something with both dataframes? \n", + " * remove rows with `needs manual check`\n", + " * then append subset df to initial df?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "580e9c87-9aae-4221-bc29-251e4e1469be", + "metadata": {}, + "outputs": [], + "source": [ + "manual_check = df[df['prop_type'] == 'needs manual check']" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "970c595c-28a0-4e6c-b0b2-2065d75971ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(58, 14)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "needs manual check 58\n", + "Name: prop_type, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", + " 'approx_#_of_buses', 'project_type', 'propulsion_category',\n", + " 'area_served', 'congressional_districts', 'fta_region',\n", + " 'bus/low-no_program', 'bus_count', 'prop_type'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(manual_check.shape, manual_check['prop_type'].value_counts(), manual_check.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "3e66c103-bec0-4925-9a71-b446f6931c33", + "metadata": {}, + "outputs": [], + "source": [ + "manual_checker_list = [\n", + " 'County Mass Transit District will receive funding to buy buses',\n", + " 'Colorado will receive funding to buy vans to replace older ones',\n", + " 'ethanol-fueled buses',\n", + " ' will receive funding to buy vans to replace',\n", + " 'funding to replace the oldest buses',\n", + " 'to buy buses and charging equipment',\n", + " 'counties by buying buses',\n", + " 'receive funding to buy cutaway paratransit buses',\n", + " 'new replacement vehicles',\n", + " 'propane-powered',\n", + " 'hybrid diesel-electric buses',\n", + " 'propane fueled buses',\n", + " 'cutaway vehicles',\n", + " 'diesel-electric hybrid',\n", + " \"low or no emission buses\",\n", + " \"electric buses\",\n", + " 'hybrid-electric vehicles',\n", + " \"electric commuter\",\n", + " \"Electric Buses\",\n", + " \"battery electric\",\n", + " \"Batery Electric\",\n", + " \"battery-electric\",\n", + " \"fuel-cell\",\n", + " \"fuel cell\",\n", + " \"Fuel Cell\",\n", + " \"zero emission\",\n", + " \"Zero Emission\",\n", + " \"zero-emission electric buses\",\n", + " \"zero-emission buses\",\n", + " \"zero‐emission\",\n", + " \"zero-emission\",\n", + " \"zeroemission\",\n", + " \"CNG\",\n", + " \"cng\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "9ea7203e-c44f-4b02-a6da-31e7dea885c9", + "metadata": {}, + "outputs": [], + "source": [ + "# function to match keywords to list\n", + "def prop_type_finder(description):\n", + " for keyword in manual_checker_list:\n", + " if keyword in description:\n", + " return keyword\n", + " return \"not specified\"" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "83946e9b-029f-4b4c-941f-921c10e09f0f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_762/389505494.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " manual_check[\"prop_type\"] = manual_check[\"description\"].apply(prop_type_finder)\n" + ] + } + ], + "source": [ + "manual_check[\"prop_type\"] = manual_check[\"description\"].apply(prop_type_finder)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "06736c33-60e9-41ef-a644-c4eacd28c397", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(58, 14)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "not specified 21\n", + "electric buses 11\n", + "zero-emission 5\n", + "zero emission 2\n", + "battery-electric 2\n", + "diesel-electric hybrid 1\n", + "Colorado will receive funding to buy vans to replace older ones 1\n", + " will receive funding to buy vans to replace 1\n", + "ethanol-fueled buses 1\n", + "County Mass Transit District will receive funding to buy buses 1\n", + "propane fueled buses 1\n", + "cutaway vehicles 1\n", + "hybrid diesel-electric buses 1\n", + "propane-powered 1\n", + "funding to replace the oldest buses 1\n", + "to buy buses and charging equipment 1\n", + "counties by buying buses 1\n", + "receive funding to buy cutaway paratransit buses 1\n", + "new replacement vehicles 1\n", + "zero-emission buses 1\n", + "low or no emission buses 1\n", + "hybrid-electric vehicles 1\n", + "Name: prop_type, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['state', 'project_sponsor', 'project_title', 'description', 'funding',\n", + " 'approx_#_of_buses', 'project_type', 'propulsion_category',\n", + " 'area_served', 'congressional_districts', 'fta_region',\n", + " 'bus/low-no_program', 'bus_count', 'prop_type'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(manual_check.shape, manual_check['prop_type'].value_counts(), manual_check.columns)" + ] + }, + { + "cell_type": "markdown", + "id": "8a498a0b-4552-4c38-90cd-6e1c4045d40d", + "metadata": { + "tags": [] + }, + "source": [ + "### Need new column for `bus size type` via list and function\n", + "cutaway, 40ft etc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d24e6642-358c-4f2a-8139-7fd941faf5e1", + "metadata": {}, + "outputs": [], + "source": [ + "list(df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6053159a-62b1-4866-a13e-47fabf4576b4", + "metadata": {}, + "outputs": [], + "source": [ + "bus_size = [\n", + " \"standard\",\n", + " \"40 foot\",\n", + " \"40-foot\",\n", + " \"40ft\",\n", + " \"articulated\",\n", + " \"cutaway\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3500afae-6358-435f-9d25-1854b9f16634", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to match keywords\n", + "def find_bus_size_type(description):\n", + " for keyword in bus_size:\n", + " if keyword in description.lower():\n", + " return keyword\n", + " return \"not specified\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7a0ae00-3a85-4e95-ae94-7ffeb55c3a8d", + "metadata": {}, + "outputs": [], + "source": [ + "# new column called bus size type based on description column\n", + "df[\"bus_size_type\"] = df[\"description\"].apply(find_bus_size_type)" ] }, { @@ -3277,7 +1191,22 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": null, + "id": "e0c6c4a2-4749-4f6b-ad14-d452f1e201b8", + "metadata": {}, + "outputs": [], + "source": [ + "#check work\n", + "display(\n", + " df.head(3),\n", + " df.bus_size_type.unique(),\n", + " df.shape\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "609a3659-8bf2-4412-aabc-d6ea956c3bbe", "metadata": {}, "outputs": [], @@ -3313,45 +1242,56 @@ { "cell_type": "code", "execution_count": null, - "id": "8a4e9841-79f5-41e1-9c53-3def3d802bd9", + "id": "7a2c32a7-4cd0-4755-8a04-2db5638cdcbb", "metadata": {}, "outputs": [], "source": [ - "# confirming cleaned data shows as expected.\n", - "display(bus_cost.shape, type(bus_cost), bus_cost.columns)" + "# drop unnessary columns\n", + "bus_cost = bus_cost.drop([\"Unnamed: 0\", \"congressional_districts\"], axis=1)" ] }, { "cell_type": "code", "execution_count": null, - "id": "7a2c32a7-4cd0-4755-8a04-2db5638cdcbb", + "id": "8a4e9841-79f5-41e1-9c53-3def3d802bd9", "metadata": {}, "outputs": [], "source": [ - "# drop unnessary columns\n", - "bus_cost = bus_cost.drop([\"Unnamed: 0\", \"congressional_districts\"], axis=1)" + "# confirming cleaned data shows as expected.\n", + "display(bus_cost.shape, type(bus_cost), bus_cost.columns)" ] }, { "cell_type": "code", "execution_count": null, - "id": "58a92ab0-8f13-4462-989a-e1c9105ff414", + "id": "f95f7ae2-2ddf-40b7-aa76-af00e83854d1", "metadata": {}, "outputs": [], "source": [ - "# confirming columns dropped as intended.\n", - "# less columns(14 to 12)\n", - "display(bus_cost.shape, bus_cost.columns)" + "bus_cost['prop_type'].sort_values(ascending=True).unique()" + ] + }, + { + "cell_type": "markdown", + "id": "1ede25fd-850a-4be6-bac0-a1bffa05b776", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## DEPRECATED - Data Analysis\n", + "actual data analysis and summary stats exist in the `cost_per_bus_analysis.ipynb` notebook" ] }, { "cell_type": "markdown", "id": "42ca0dfd-2ea1-4194-b431-0e4853d21879", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Cost per Bus, per Transit Agency dataframe" + "### Cost per Bus, per Transit Agency dataframe" ] }, { @@ -3431,7 +1371,7 @@ "tags": [] }, "source": [ - "## Cost per bus, stats analysis" + "### Cost per bus, stats analysis" ] }, { @@ -3464,14 +1404,13 @@ "tags": [] }, "source": [ - "## Initial Summary Stats" + "### Initial Summary Stats" ] }, { "cell_type": "markdown", "id": "c4f9c488-8306-4eb3-bac0-40c75ac1dfed", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -3535,7 +1474,6 @@ "cell_type": "markdown", "id": "e85e540e-0396-49b0-9f2e-64e5236e63e8", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -3622,7 +1560,6 @@ "cell_type": "markdown", "id": "211cb7b6-8fb1-4d52-890a-7106afb981a0", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -3653,11 +1590,10 @@ "cell_type": "markdown", "id": "ac02fbbb-2a88-486f-8001-fd8156c50bfb", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Overall Summary" + "### Overall Summary" ] }, { diff --git a/bus_procurement_cost/README.md b/bus_procurement_cost/README.md index 01d8e9f1a..637a02924 100644 --- a/bus_procurement_cost/README.md +++ b/bus_procurement_cost/README.md @@ -3,8 +3,8 @@ ## Datasets * FTA Bus and Low- and No-Emission Grant Awards * TIRCP Project List -* DGS Usage Reports (via Rebel) -* Washington and Georgia Contract list (via Rebel) +* (upcoming) DGS Usage Reports (via Rebel) +* (upcoming )Washington and/or Georgia Contract list (via Rebel) ## GH issue Research Request - Bus Procurement Costs & Awards #897 @@ -19,5 +19,15 @@ Identify federal awards to fund bus purchases and how much agencies pay for them * count of buses * propulsion type of buses (zero/non-zero emission, BEB, FCEB, CNG etc) * bus type (standard, cutaway, articulated etc) -- Combine datasets together, aggregate up by transit agency, calculate a "cost_per_bus" column. -- Anylyze cost per bus for the different bus categories \ No newline at end of file +

+- Combine datasets together, aggregate up by transit agency, calculate a "cost_per_bus" (cpb) column. +- Aggregate cpb by: + * transit agency + * propulsion type + * bus size type +

+ - Visualize aggregations on charts + - Calculate summary stats on cpb + * calculate mean, standard deviation + * calculate z-score. remove outliers + * plot distribution \ No newline at end of file diff --git a/bus_procurement_cost/code_review.md b/bus_procurement_cost/code_review.md new file mode 100644 index 000000000..07e21fbb6 --- /dev/null +++ b/bus_procurement_cost/code_review.md @@ -0,0 +1,75 @@ +# Code Review +[Google Drive notes](https://docs.google.com/document/d/12SLubjQoE8NLLOm8Nb5DYKzwdNS1WYOkvrK_HnR1-FY/) + +### README for methodology is good +* The methodology reads like a narrative, code can be the same! +* Use functions to break your code up into discrete steps. +``` +def clean_funding(df: pd.DataFrame): + # dollar sign stuff + # maybe rounding + # clean up missing values + return + +def clean_other_columns(df): + return + + +def data_cleaning_tircp(): + df = pd.read_csv() + df1 = clean_funding(df) + df2 = clean_other_columns(df1) + + return df2 +``` + + +### Use `pandas` over `numpy` for now +* Generate a lot of your columns at once +* You can do group stats more easily with `pandas` +* With `numpy`, it's a little harder to include groupings in the stats you want +``` +df2 = df.assign( + # grouping over a column or list of columns + # use transform() to get what you want + group_mean = (df.groupby("shape_array_key") + .service_hours + .transform("mean") + ), + group_std = (df.groupby("shape_array_key") + .service_hours + .transform("std") + ) +) + +df2 = df2.assign( + z_score = (df2.service_hours - df2.group_mean) / df2.group_std +) +``` + + +### Use a function to make a chart +* find what's in common -> can include those within the function +* what's not in common -> set as variable +``` +def make_bar_chart(concat, y_col, chart_title): + axis_labeling_dict = { + "bus_count": "# of buses" + "project_sponsor": "Transit Agencies" + } + + #bar chart of highest bus count + concat = (concat.sort_values(by=y_col, ascending=False) + .head(10) + .plot(x='project_sponsor', + y=y_col, kind='bar', color='skyblue') + ) + plt.title(chart_title) + plt.xlabel(axis_labeling_dict["project_sponsor"]) + plt.ylabel(axis_labeling_dict[y_col]) + # return plt? + + +c1 = make_bar_chart(concat, y_col = "bus_funds", chart_title = "First Title") +c2 = make_bar_chart(concat, y_col = "bus_count", chart_title = "Second Title") +``` \ No newline at end of file diff --git a/bus_procurement_cost/cost_per_bus_analysis.ipynb b/bus_procurement_cost/cost_per_bus_analysis.ipynb index f72e1dacc..316f2d983 100644 --- a/bus_procurement_cost/cost_per_bus_analysis.ipynb +++ b/bus_procurement_cost/cost_per_bus_analysis.ipynb @@ -2,17 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 78, + "execution_count": 1, "id": "8b95db91-abf1-4357-884f-e11d73777da5", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "import pandas as pd\n", + "import seaborn as sns\n", "import shared_utils\n", "from scipy.stats import zscore\n", - "import seaborn as sns\n", - "import numpy as np\n", "\n", "# set_option to increase max rows displayed to 200, to see entire df in 1 go/\n", "pd.set_option(\"display.max_rows\", 200)" @@ -36,8 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "# FTA Grant Award Press Release Data\n", - "fta = pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_cost_per_bus.csv')" + "# cleaned FTA Grant Award Press Release Data for buses only\n", + "fta = pd.read_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_bus_cost_clean.csv\"\n", + ")" ] }, { @@ -47,19 +49,62 @@ "metadata": {}, "outputs": [], "source": [ - "# TIRCP Project Tracking Data \n", - "tircp = pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_cost_per_bus.csv')" + "# cleaned TIRCP Project Tracking Data project tracking sheet\n", + "tircp = pd.read_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv\"\n", + ")" ] }, { "cell_type": "markdown", - "id": "6950af52-f9eb-440b-9801-0396b650d502", + "id": "3e019b4d-2445-4d7c-a2a9-0cb2c5953a44", + "metadata": {}, + "source": [ + "## Game Plan\n", + "- bring in both data sets (FTA Press Release and TIRCP bus data\n", + "- FTA data, make sure it only has rows with bus count > 0\n", + "- may need to clean up the prop type and bus size type if there are any similar categories (completed at FTA notebook)\n", + "- Create shorten data frames for each. include the following columns:\n", + " 1. agency name (project_sponsor & grant_recipient)\n", + " 2. project title? (project_title)\n", + " 3. project award amount (funding and tircp_award_amount($))\n", + " 4. bus count (bus_count)\n", + " 5. propulsion type (prop_type)\n", + " 6. bus size type (bus_size_type)\n", + "
\n", + "
\n", + "- concat the short dataframes\n", + "- start aggregation. sum/count bus count, funding and project #\n", + " * agg by agency name\n", + " * agg by prop type\n", + " * agg by bus size type\n", + "\n", + "- new column for Z-score of `cost_per_bus`\n", + "- rerun stats sumamry\n", + "- use functions to make charts" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "85f4a371-32d5-4497-9aaf-dd2361b913e0", + "metadata": {}, + "outputs": [], + "source": [ + "# function to display df info\n", + "def df_peek(df):\n", + " display(type(df), df.shape, df.dtypes, df.sample(2))" + ] + }, + { + "cell_type": "markdown", + "id": "82def04b-551f-4480-a18e-3e121308bbf5", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Dataset cleaning" + "## Read in FTA and TIRCP Data" ] }, { @@ -70,13 +115,31 @@ "outputs": [], "source": [ "# peaking into each dataset\n", - "data=[fta, tircp]\n", + "data = [fta, tircp]\n", "\n", "for x in data:\n", - " display(x.shape,\n", - " x.columns,\n", - " x.dtypes,\n", - " x.head())" + " display(x.shape, x.columns, x.head(), x.dtypes)" + ] + }, + { + "cell_type": "markdown", + "id": "6950af52-f9eb-440b-9801-0396b650d502", + "metadata": { + "tags": [] + }, + "source": [ + "## Dataset cleaning" + ] + }, + { + "cell_type": "markdown", + "id": "d2588e43-41b8-4886-a298-416cf4a4c83d", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### drop some columns" ] }, { @@ -86,11 +149,8 @@ "metadata": {}, "outputs": [], "source": [ - "#look to drop column from each df\n", - "data=[fta, tircp]\n", - "\n", - "for x in data:\n", - " x.drop('Unnamed: 0', axis=1, inplace=True)" + "# examine columns\n", + "display(fta.columns, tircp.columns)" ] }, { @@ -100,407 +160,408 @@ "metadata": {}, "outputs": [], "source": [ - "display(fta.columns,\n", - "tircp.columns)" + "fta = fta.drop(\n", + " columns=[\n", + " \"Unnamed: 0\",\n", + " \"area_served\",\n", + " \"congressional_districts\",\n", + " \"fta_region\",\n", + " \"bus/low-no_program\",\n", + " ]\n", + ")" ] }, { - "cell_type": "markdown", - "id": "db7c3d45-d0b6-469a-b299-ccfb4efdbce1", + "cell_type": "code", + "execution_count": null, + "id": "40ce35ff-38bf-4001-b25f-37878eab700c", "metadata": {}, + "outputs": [], "source": [ - "### reorder columns" + "tircp = tircp.drop(\n", + " columns=[\n", + " \"Unnamed: 0.2\",\n", + " \"Unnamed: 0.1\",\n", + " \"Unnamed: 0\",\n", + " \"award_year\",\n", + " \"district\",\n", + " \"county\",\n", + " \"total_project_cost\",\n", + " ]\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "f55f8473-9af6-4c2d-8d8f-0698cde0c265", + "id": "17a26be1-15ba-40a0-9d09-0218d79443f4", "metadata": {}, "outputs": [], "source": [ - "new_order =['grant_recipient',\n", - " 'tircp_award_amount_($)',\n", - " 'bus_count',\n", - " 'cost_per_bus']\n", - "tircp = tircp[new_order]" + "display(fta.columns, tircp.columns)" ] }, { "cell_type": "markdown", - "id": "cdfd4d75-59d3-482c-97bd-349a11abf105", - "metadata": {}, - "source": [ - "### rename columns to match eachother" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95c0112f-78cb-4cb5-aa09-d75b124cdbec", - "metadata": {}, - "outputs": [], + "id": "6b1e029f-5da7-417a-bc7f-94690bfdc25b", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "new_col =['project_sponsor', 'funding', 'bus_count', 'cost_per_bus']\n", - "\n", - "tircp.columns=new_col" + "### drop zero bus counts from FTA" ] }, { "cell_type": "code", "execution_count": null, - "id": "dfaf1766-5a70-4ec2-a54b-0e16f9fca8bc", + "id": "4553c5aa-95be-488e-b870-ddc3f36fa321", "metadata": {}, "outputs": [], "source": [ - "display(list(fta.columns),\n", - "list(tircp.columns))" + "fta = fta[fta[\"bus_count\"] > 0]" ] }, { "cell_type": "code", "execution_count": null, - "id": "7f801d4d-ac40-4c19-9c61-540a922b8805", + "id": "0ad42087-d70b-46f0-8fb0-6c452b7a708b", "metadata": {}, "outputs": [], "source": [ - "# add new col to identify source\n", - "fta['source']='fta'\n", - "tircp['source']='tircp_project_tracking'" + "# no more '0' values\n", + "fta.bus_count.unique()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "1e71a2b7-ccfd-4e3f-a810-fa0227d899d2", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "dbc87cae-a0b5-41e7-822b-11b370bdb5ce", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "display(fta.head(),tircp.head())" + "## Shorten data frames" ] }, { "cell_type": "markdown", - "id": "b8ca8d50-5041-4459-a764-ccc12ced2037", + "id": "b21e89f6-efcc-48a3-8224-1c9c3926ef68", "metadata": {}, "source": [ - "### Concat both dataframes" + "FTA & TIRCP columns names respecitvely\n", + "\n", + " 1. agency name (project_sponsor & grant_recipient)\n", + " 2. project title? (project_title)\n", + " 3. project award amount (funding and tircp_award_amount($))\n", + " 4. bus count (bus_count)\n", + " 5. propulsion type (prop_type)\n", + " 6. bus size type (bus_size_type)" ] }, { "cell_type": "code", "execution_count": null, - "id": "5c7bfad5-9048-4712-b663-c4795b071737", + "id": "8f61675b-f445-4306-9ce6-27169d6b6909", "metadata": {}, "outputs": [], "source": [ - "concat = pd.concat([fta,tircp], axis=0).reset_index()" + "fta_short = fta[\n", + " [\n", + " \"project_sponsor\",\n", + " \"project_title\",\n", + " \"funding\",\n", + " \"bus_count\",\n", + " \"prop_type\",\n", + " \"bus_size_type\",\n", + " ]\n", + "]" ] }, { "cell_type": "code", "execution_count": null, - "id": "7d5dd236-bdb4-4213-997a-b2c9a4b8c0b6", + "id": "ffd9239b-45c3-4fcf-890e-773f3a38935d", "metadata": {}, "outputs": [], "source": [ - "concat = concat.drop('index', axis=1)" + "len(fta_short) == len(fta)" ] }, { "cell_type": "code", "execution_count": null, - "id": "9c63d955-ccff-4316-aa42-1aa66af7beed", + "id": "b60eb1cb-b535-44f0-8bc5-341928012c59", "metadata": {}, "outputs": [], "source": [ - "concat" + "tircp_short = tircp[\n", + " [\n", + " \"grant_recipient\",\n", + " \"project_title\",\n", + " \"tircp_award_amount_($)\",\n", + " \"bus_count\",\n", + " \"prop_type\",\n", + " \"bus_size_type\",\n", + " ]\n", + "]" ] }, { "cell_type": "code", "execution_count": null, - "id": "4ea75308-6784-4aba-ae2d-b8ceb05ad189", + "id": "af4bacbe-92c5-4dc2-93bf-6afe888bfacd", "metadata": {}, "outputs": [], "source": [ - "list(concat.project_sponsor.sort_values().unique())" + "len(tircp_short) == len(tircp)" ] }, { "cell_type": "markdown", - "id": "14f00383-8c11-4656-a019-a5b58cc4d076", + "id": "cdfd4d75-59d3-482c-97bd-349a11abf105", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Export concat data" + "## rename columns to match eachother" ] }, { "cell_type": "code", "execution_count": null, - "id": "173aa1c6-fc73-4476-b3d0-e3bab1c48e4b", + "id": "95c0112f-78cb-4cb5-aa09-d75b124cdbec", "metadata": {}, "outputs": [], "source": [ - "concat.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_tircp_concat.csv')" - ] - }, - { - "cell_type": "markdown", - "id": "e7582392-0b4b-4c95-a2a6-b0939cf63862", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Read in concat data (TIRCP and FTA data)" + "# list of new column names. to be applied in same order as short df\n", + "new_col = [\n", + " \"agency_name\",\n", + " \"project_title\",\n", + " \"project_award_amount\",\n", + " \"bus_count\",\n", + " \"prop_type\",\n", + " \"bus_size_type\",\n", + "]" ] }, { "cell_type": "code", "execution_count": null, - "id": "5b474b47-7617-4eaa-b8fb-c86709c77368", + "id": "90da70ce-0bca-4140-83ed-da95d9c63157", "metadata": {}, "outputs": [], "source": [ - "concat = pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_tircp_concat.csv')" + "fta_short.columns = new_col\n", + "tircp_short.columns = new_col" ] }, { "cell_type": "code", "execution_count": null, - "id": "89faef72-2c03-46e5-b908-9b6bd86b1210", + "id": "dfaf1766-5a70-4ec2-a54b-0e16f9fca8bc", "metadata": {}, "outputs": [], "source": [ - "display(concat.shape,\n", - " concat.dtypes,\n", - " concat.head(),\n", - " )\n", - " " + "fta_short.columns == tircp_short.columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "71a64926-2ea5-4bbf-aae9-d243c3830ada", + "id": "7f801d4d-ac40-4c19-9c61-540a922b8805", "metadata": {}, "outputs": [], "source": [ - "# add new column for z-score\n", - "concat['zscore_' + 'cost_per_bus'] = zscore(concat['cost_per_bus'])" + "# add new col to identify source\n", + "fta_short[\"source\"] = \"fta_press_release\"\n", + "tircp_short[\"source\"] = \"tircp_project_tracking\"" ] }, { "cell_type": "code", "execution_count": null, - "id": "5111734e-6f0e-4d99-a585-94f80341ca11", + "id": "1e71a2b7-ccfd-4e3f-a810-fa0227d899d2", "metadata": {}, "outputs": [], "source": [ - "concat.shape" + "display(fta_short.head(), tircp_short.head())" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "b074a5a1-8479-4366-9dff-5533263b45c1", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "b8ca8d50-5041-4459-a764-ccc12ced2037", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "# remove outliers\n", - "#filter df for zscores =>-3<=3\n", - "filtered = concat[(concat['zscore_cost_per_bus'] >= -3) & (concat['zscore_cost_per_bus'] <=3)]" + "## Concat both dataframes" ] }, { "cell_type": "code", "execution_count": null, - "id": "8e2309e9-c6f0-4718-a5a9-17991345d243", + "id": "5c7bfad5-9048-4712-b663-c4795b071737", "metadata": {}, "outputs": [], "source": [ - "# 2 rows were dropped\n", - "filtered.shape" + "concat = pd.concat([fta_short, tircp_short], axis=0).reset_index()" ] }, { "cell_type": "code", "execution_count": null, - "id": "8aa8cda1-1caf-45a0-ad06-0bda5d81aefa", + "id": "d6142fce-639e-4082-b83c-6fec6e6a53cf", "metadata": {}, "outputs": [], "source": [ - "filtered.zscore_cost_per_bus.max()" + "display(\n", + " len(fta_short), len(tircp_short), len(fta_short) + len(tircp_short) == len(concat)\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "5d7b379c-a7d6-43d9-9290-4b7b6fe07ef5", + "id": "9c63d955-ccff-4316-aa42-1aa66af7beed", "metadata": {}, "outputs": [], "source": [ - "filtered.sort_values(by='cost_per_bus', ascending=False).head()" - ] - }, - { - "cell_type": "markdown", - "id": "eaf73856-2e5b-48be-9107-a8ca97b22f1f", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Stats Analysis" + "concat.sample(3)" ] }, { "cell_type": "code", "execution_count": null, - "id": "49d3cd64-fc0c-4749-a378-30730bbd8fc7", + "id": "7d5dd236-bdb4-4213-997a-b2c9a4b8c0b6", "metadata": {}, "outputs": [], "source": [ - "mean = np.mean(filtered['cost_per_bus'])\n", - "std_dev = np.std(filtered['cost_per_bus'])\n", - "\n", - "#zscore\n", - "mean2 = np.mean(filtered['zscore_cost_per_bus'])\n", - "std_dev2 = np.std(filtered['zscore_cost_per_bus'])\n" + "concat = concat.drop(\"index\", axis=1)" ] }, { "cell_type": "code", "execution_count": null, - "id": "e42ba34a-2712-438b-a708-1bc149ec9295", + "id": "4ea75308-6784-4aba-ae2d-b8ceb05ad189", "metadata": {}, "outputs": [], "source": [ - "mean2 + (std_dev2*3)" + "# concat looks good\n", + "list(concat.prop_type.sort_values().unique())" ] }, { "cell_type": "markdown", - "id": "88f21124-6b2e-4e3c-b5f2-4d7bfca8b1f5", + "id": "14f00383-8c11-4656-a019-a5b58cc4d076", "metadata": { "tags": [] }, "source": [ - "## Summary" + "## Export concat data" ] }, { "cell_type": "code", "execution_count": null, - "id": "0bd93bab-5b41-43dc-a122-9ef284d18298", - "metadata": { - "jupyter": { - "source_hidden": true - }, - "tags": [] - }, + "id": "173aa1c6-fc73-4476-b3d0-e3bab1c48e4b", + "metadata": {}, "outputs": [], "source": [ - "concat.head()" + "concat.to_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_tircp_concat.csv\"\n", + ")" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "fd7906a3-7bf1-4bed-afa3-1a69f21c155f", + "cell_type": "markdown", + "id": "e7582392-0b4b-4c95-a2a6-b0939cf63862", "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, - "outputs": [], "source": [ - "concat.sort_values(by='cost_per_bus',ascending=True).head()" + "### Read in concat data (TIRCP and FTA data)" ] }, { "cell_type": "code", - "execution_count": 79, - "id": "1cb3ff5d-fa12-4e28-982d-96a0143a2b0f", - "metadata": { - "jupyter": { - "source_hidden": true - }, - "tags": [] - }, + "execution_count": 3, + "id": "5b474b47-7617-4eaa-b8fb-c86709c77368", + "metadata": {}, "outputs": [], "source": [ - "# Variables\n", - "total_unique_projects = len(concat)\n", - "total_bus_count = sum(concat.bus_count)\n", - "total_funding = sum(concat.funding)\n", - "min_bus_cost = concat.cost_per_bus.min()\n", - "max_bus_cost = concat.cost_per_bus.max()\n", - "max_bus_count = concat.bus_count.max()\n", - "\n", - "mean = np.mean(filtered['cost_per_bus'])\n", - "std_dev = np.std(filtered['cost_per_bus'])\n", - "\n", - "agency_with_most_bus = concat.loc[concat['bus_count'].idxmax(), 'project_sponsor']\n", - "#how many buses do they have? already answered\n", - "agency_with_highest_funds = concat.loc[concat['funding'].idxmax(), 'project_sponsor']\n", - "#what is the highest amount? already answered\n", - "agency_max_cpb = concat.loc[concat['cost_per_bus'].idxmax(), 'project_sponsor']\n", - "agency_min_cpb = concat.loc[concat['cost_per_bus'].idxmin(), 'project_sponsor']" + "all_bus = pd.read_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/fta_tircp_concat.csv\"\n", + ")" ] }, { - "cell_type": "code", - "execution_count": 80, - "id": "17f4e102-c684-4892-8fa0-0f21e5249ef1", + "cell_type": "markdown", + "id": "3e07cd2c-9bc7-4bcc-8e4f-e7d58ebcc1fc", "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, + "source": [ + "## create cost_per_bus_column\n", + "on all_bus df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b35f7018-64f7-4a5b-ab50-10d871115c83", + "metadata": {}, "outputs": [], "source": [ - "summary = f'''\n", - "As of today, data was scraped from mutltiple sources:\n", - " 1. FTA Bus and Low- and No-Emission Grant Awards press release (federaly funded, nationwide data)\n", - " 2. TIRCP project data (state-funded, California only)\n", - " \n", - "Data from DGS usage reports, Georgia and Washington contracts to be analyzed next.\n", - "\n", - "Note, some projects included additional compoments besides bus purchases (chargers, transit facilities, parts, training) which may cause project costs to increase dramaticly, whereas other projects specified only bus purcahses, and some did not include and bus purchases at all.\n", - "\n", - "Datasets was filtered to only include data that specificed the number of buses to purchase. The compiled data was aggregated by agencies and a 'cost per_bus' metric was calculated by dividing the total funding the agency received by the total number of buses they specify.\n", - "\n", - "In total:\n", - " - {total_unique_projects} projects with bus purchases were analyzed.\n", - " - ${total_funding:,.2f} was awarded to agencies for projects including bus purchases.\n", - " - {total_bus_count} total buses are to be purchased.\n", - " - The highest cost per bus for an agency was ${max_bus_cost:,.2f}, belonging too {agency_max_cpb} \n", - " - The lowest cost per bus for an agency was ${min_bus_cost:,.2f}, belonging too {agency_min_cpb}\n", - " \n", - "\n", - "The agency with the most buses specified was {agency_with_most_bus} with {max_bus_count} buses.\n", - "\n", - "After removing outliers, the following was discovered:\n", - " - the mean cost per bus is ${mean:,.2f}.\n", - " - the standard deviation is ${std_dev:,.2f}. \n", - "\n", - "Below are charts that summarize the data.\n", - "'''" + "all_bus[\"cost_per_bus\"] = (\n", + " all_bus[\"project_award_amount\"] / all_bus[\"bus_count\"]\n", + ").astype(\"int64\")" ] }, { "cell_type": "code", - "execution_count": 87, - "id": "808369c9-b70f-4185-9dc1-0bda833f2815", + "execution_count": 5, + "id": "07acb5fc-5611-40d6-bfa5-90b814bd5588", "metadata": {}, "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(133, 9)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Unnamed: 0 int64\n", + "agency_name object\n", + "project_title object\n", + "project_award_amount int64\n", + "bus_count float64\n", + "prop_type object\n", + "bus_size_type object\n", + "source object\n", + "cost_per_bus int64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ @@ -523,98 +584,662 @@ " \n", " \n", " Unnamed: 0\n", - " project_sponsor\n", - " funding\n", + " agency_name\n", + " project_title\n", + " project_award_amount\n", " bus_count\n", - " cost_per_bus\n", + " prop_type\n", + " bus_size_type\n", " source\n", - " zscore_cost_per_bus\n", + " cost_per_bus\n", " \n", " \n", " \n", " \n", - " 0\n", - " 0\n", - " AUTORIDAD METROPOLITANA DE AUTOBUSES (PRMBA)\n", - " 10000000\n", - " 8.0\n", - " 1250000\n", - " fta\n", - " 0.175536\n", - " \n", - " \n", - " 1\n", - " 1\n", - " Alameda-Contra Costa Transit District\n", - " 25513684\n", - " 25.0\n", - " 1020547\n", - " fta\n", - " -0.025151\n", - " \n", - " \n", - " 2\n", - " 2\n", - " Berkshire Regional Transit Authority\n", - " 2212747\n", - " 2.0\n", - " 1106373\n", - " fta\n", - " 0.049915\n", - " \n", - " \n", - " 3\n", - " 3\n", - " Brazos Transit District\n", - " 9650646\n", - " 11.0\n", - " 877331\n", - " fta\n", - " -0.150412\n", + " 96\n", + " 96\n", + " Antelope Valley Transit Authority (AVTA)\n", + " Regional Transit Interconnectivity & Environme...\n", + " 24403000\n", + " 29.0\n", + " electric (not specified)\n", + " conventional (40-ft like)\n", + " tircp_project_tracking\n", + " 841482\n", " \n", " \n", - " 4\n", - " 4\n", - " Cape Fear Public Transportation Authority\n", - " 2860250\n", - " 5.0\n", - " 572050\n", - " fta\n", - " -0.417420\n", + " 119\n", + " 119\n", + " Contra Costa Transportation Authority (CCTA)\n", + " I-680 Express Bus Program\n", + " 14460000\n", + " 6.0\n", + " zero-emission bus (not specified)\n", + " not specified\n", + " tircp_project_tracking\n", + " 2410000\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Unnamed: 0 project_sponsor funding \\\n", - "0 0 AUTORIDAD METROPOLITANA DE AUTOBUSES (PRMBA) 10000000 \n", - "1 1 Alameda-Contra Costa Transit District 25513684 \n", - "2 2 Berkshire Regional Transit Authority 2212747 \n", - "3 3 Brazos Transit District 9650646 \n", - "4 4 Cape Fear Public Transportation Authority 2860250 \n", + " Unnamed: 0 agency_name \\\n", + "96 96 Antelope Valley Transit Authority (AVTA) \n", + "119 119 Contra Costa Transportation Authority (CCTA) \n", + "\n", + " project_title project_award_amount \\\n", + "96 Regional Transit Interconnectivity & Environme... 24403000 \n", + "119 I-680 Express Bus Program 14460000 \n", "\n", - " bus_count cost_per_bus source zscore_cost_per_bus \n", - "0 8.0 1250000 fta 0.175536 \n", - "1 25.0 1020547 fta -0.025151 \n", - "2 2.0 1106373 fta 0.049915 \n", - "3 11.0 877331 fta -0.150412 \n", - "4 5.0 572050 fta -0.417420 " + " bus_count prop_type bus_size_type \\\n", + "96 29.0 electric (not specified) conventional (40-ft like) \n", + "119 6.0 zero-emission bus (not specified) not specified \n", + "\n", + " source cost_per_bus \n", + "96 tircp_project_tracking 841482 \n", + "119 tircp_project_tracking 2410000 " ] }, - "execution_count": 87, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "concat.head()" + "df_peek(all_bus)" + ] + }, + { + "cell_type": "markdown", + "id": "6ddefcb9-7c31-4a69-bef8-ae80455a2a39", + "metadata": { + "tags": [] + }, + "source": [ + "## Aggregate\n", + "To get total funding and bus count\n", + "- per agency\n", + "- per propulsion type\n", + "- per bus size type" ] }, { "cell_type": "code", - "execution_count": 81, - "id": "619d8c20-0383-470b-b3a5-11ea9f2b5ef7", + "execution_count": 6, + "id": "9eea9c65-dd39-46a2-ba0f-a276e2fc913f", + "metadata": {}, + "outputs": [], + "source": [ + "## function to agg by X col by project title, award and bus count\n", + "\n", + "\n", + "def bus_aggregate(column):\n", + " df_agg = (\n", + " all_bus.groupby(column)\n", + " .agg(\n", + " total_project_count=(\"project_title\", \"count\"),\n", + " total_funds=(\"project_award_amount\", \"sum\"),\n", + " total_bus_count=(\"bus_count\", \"sum\"),\n", + " )\n", + " .reset_index()\n", + " )\n", + " return df_agg" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f68d9a34-2d60-4366-9f3d-7ead081b8149", + "metadata": {}, + "outputs": [], + "source": [ + "agency_agg = bus_aggregate(\"agency_name\")\n", + "prop_agg = bus_aggregate(\"prop_type\")\n", + "size_agg = bus_aggregate(\"bus_size_type\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fa04c528-b46a-49a7-98b3-a1857ddc6a9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(127, 4)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "agency_name object\n", + "total_project_count int64\n", + "total_funds int64\n", + "total_bus_count float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agency_nametotal_project_counttotal_fundstotal_bus_count
107Texas Department of Transportation on behalf o...1744376556.0
9Champaign-Urbana Mass Transit District1663539410.0
\n", + "
" + ], + "text/plain": [ + " agency_name total_project_count \\\n", + "107 Texas Department of Transportation on behalf o... 1 \n", + "9 Champaign-Urbana Mass Transit District 1 \n", + "\n", + " total_funds total_bus_count \n", + "107 7443765 56.0 \n", + "9 6635394 10.0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(11, 4)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "prop_type object\n", + "total_project_count int64\n", + "total_funds int64\n", + "total_bus_count float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prop_typetotal_project_counttotal_fundstotal_bus_count
1CNG15234921904330.0
3electric (not specified)20337174822206.0
\n", + "
" + ], + "text/plain": [ + " prop_type total_project_count total_funds total_bus_count\n", + "1 CNG 15 234921904 330.0\n", + "3 electric (not specified) 20 337174822 206.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(4, 4)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "bus_size_type object\n", + "total_project_count int64\n", + "total_funds int64\n", + "total_bus_count float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bus_size_typetotal_project_counttotal_fundstotal_bus_count
1cutaway432861322183.0
2not specified12219446794692418.0
\n", + "
" + ], + "text/plain": [ + " bus_size_type total_project_count total_funds total_bus_count\n", + "1 cutaway 4 32861322 183.0\n", + "2 not specified 122 1944679469 2418.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# list and loop to look at all agg dfs at the same time\n", + "agg_list = [agency_agg, prop_agg, size_agg]\n", + "\n", + "for x in agg_list:\n", + " display(df_peek(x))" + ] + }, + { + "cell_type": "markdown", + "id": "88f21124-6b2e-4e3c-b5f2-4d7bfca8b1f5", + "metadata": { + "tags": [] + }, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1426b90a-d19c-47d6-9aa0-ee2098264328", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "all_bus.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "a84df68a-59a1-4a11-8038-d3add66e51b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_bus.loc[all_bus[\"cost_per_bus\"].idxmin(), \"prop_type\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "a363db92-baaf-4ec3-bf69-c3379644b71e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "zero-emission bus (not specified) 21\n", + "BEB 20\n", + "electric (not specified) 20\n", + "CNG 15\n", + "low emission (hybrid) 15\n", + "FCEB 6\n", + "low emission (propane) 5\n", + "mix (zero and low emission buses) 3\n", + "not specified 2\n", + "mix (BEB and FCEB) 1\n", + "mix (low emission) 1\n", + "Name: prop_type, dtype: int64" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_bus['prop_type'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "04af9610-0ab3-43d0-8f30-be75ca4d99ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0agency_nameproject_titleproject_award_amountbus_countprop_typebus_size_typesourcecost_per_buszscore_cost_per_bus
9595Oregon Department of Transportation on behalf ...CET's Low Emission Vanpools and Support Vehicles1812505.0NaNnot specifiedfta_press_release36250-0.887972
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 agency_name \\\n", + "95 95 Oregon Department of Transportation on behalf ... \n", + "\n", + " project_title project_award_amount \\\n", + "95 CET's Low Emission Vanpools and Support Vehicles 181250 \n", + "\n", + " bus_count prop_type bus_size_type source cost_per_bus \\\n", + "95 5.0 NaN not specified fta_press_release 36250 \n", + "\n", + " zscore_cost_per_bus \n", + "95 -0.887972 " + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## WHY DOES ODOT HAVE A NAN IN PROP TYPE!!\n", + "all_bus[all_bus['agency_name'] == 'Oregon Department of Transportation on behalf of Central Oregon Intergovernmental Council']" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "17f4e102-c684-4892-8fa0-0f21e5249ef1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Variables\n", + "total_unique_projects = len(all_bus)\n", + "total_bus_count = sum(all_bus.bus_count)\n", + "total_funding = sum(all_bus.project_award_amount)\n", + "min_bus_cost = all_bus.cost_per_bus.min()\n", + "max_bus_cost = all_bus.cost_per_bus.max()\n", + "max_bus_count = all_bus.bus_count.max()\n", + "\n", + "cpb_mean = zscore_bus.cost_per_bus.mean()\n", + "cpb_std = zscore_bus.cost_per_bus.std()\n", + "\n", + "# agency\n", + "agency_with_most_bus = all_bus.loc[all_bus[\"bus_count\"].idxmax(), \"agency_name\"]\n", + "\n", + "# propulsion type\n", + "prop_type_name_max_freq = all_bus['prop_type'].value_counts().idxmax()\n", + "prop_type_max = all_bus['prop_type'].value_counts().max()\n", + "\n", + "prop_type_name_min_freq = all_bus['prop_type'].value_counts().idxmin()\n", + "prop_type_min = all_bus['prop_type'].value_counts().min()\n", + "\n", + "\n", + "\n", + "\n", + "# how many buses do they have? already answered\n", + "agency_with_highest_funds = all_bus.loc[\n", + " all_bus[\"project_award_amount\"].idxmax(), \"agency_name\"\n", + "]\n", + "# what is the highest amount? already answered\n", + "agency_max_cpb = all_bus.loc[all_bus[\"cost_per_bus\"].idxmax(), \"agency_name\"]\n", + "agency_min_cpb = all_bus.loc[all_bus[\"cost_per_bus\"].idxmin(), \"agency_name\"]\n", + "test = all_bus.loc[all_bus[\"cost_per_bus\"].idxmax(), \"prop_type\"]\n", + "test2 = all_bus.loc[all_bus[\"cost_per_bus\"].idxmin(), \"prop_type\"]\n", + "\n", + "summary = f\"\"\"\n", + "As of today, data was scraped from mutltiple sources:\n", + " 1. FTA Bus and Low- and No-Emission Grant Awards press release (federaly funded, nationwide data)\n", + " 2. TIRCP project data (state-funded, California only)\n", + " \n", + "Data from DGS usage reports, Georgia and Washington contracts to be analyzed next.\n", + "\n", + "Note, some projects included additional compoments besides bus purchases (chargers, transit facilities, parts, training) which may cause project costs to increase dramaticly, whereas other projects specified only bus purcahses, and some did not include and bus purchases at all.\n", + "\n", + "Datasets was filtered to only include data that specificed the number of buses to purchase. The compiled data was aggregated by agencies and a 'cost per_bus' metric was calculated by dividing the total funding the agency received by the total number of buses they specify.\n", + "\n", + "In total:\n", + " - {total_unique_projects} projects with bus purchases were analyzed.\n", + " - ${total_funding:,.2f} was awarded to agencies for projects including bus purchases.\n", + " - {total_bus_count} total buses are to be purchased.\n", + " - The highest cost per bus for an agency was ${max_bus_cost:,.2f}, belonging too {agency_max_cpb}, {test} \n", + " - The lowest cost per bus for an agency was ${min_bus_cost:,.2f}, belonging too {agency_min_cpb}, {test2}\n", + "\n", + "The agency with the most buses specified was {agency_with_most_bus} with {max_bus_count} buses.\n", + "\n", + "Regarding propulsion types:\n", + " Propulsion type values varied wildly amongst the datasets. Values were validated and grouped as best as poissible based on project description or other discrete mentions of propulsion type.\n", + " The following is a summary of propulsion type metrics.\n", + " - The most common propulsion type that was procureded was \"{prop_type_name_max_freq}\".\n", + " \n", + " \n", + "\n", + "After removing outliers, the following was discovered:\n", + " - the mean cost per bus is ${cpb_mean:,.2f}.\n", + " - the standard deviation is ${cpb_std:,.2f}. \n", + "\n", + "Below are charts that summarize the data.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "619d8c20-0383-470b-b3a5-11ea9f2b5ef7", "metadata": {}, "outputs": [ { @@ -633,18 +1258,24 @@ "Datasets was filtered to only include data that specificed the number of buses to purchase. The compiled data was aggregated by agencies and a 'cost per_bus' metric was calculated by dividing the total funding the agency received by the total number of buses they specify.\n", "\n", "In total:\n", - " - 125 projects with bus purchases were analyzed.\n", - " - $2,041,714,791.00 was awarded to agencies for projects including bus purchases.\n", - " - 2669.0 total buses are to be purchased.\n", - " - The highest cost per bus for an agency was $9,600,000.00, belonging too City of Torrance \n", - " - The lowest cost per bus for an agency was $36,250.00, belonging too Oregon Department of Transportation on behalf of Central Oregon Intergovernmental Council\n", - " \n", + " - 133 projects with bus purchases were analyzed.\n", + " - $2,065,774,791.00 was awarded to agencies for projects including bus purchases.\n", + " - 2718.0 total buses are to be purchased.\n", + " - The highest cost per bus for an agency was $9,600,000.00, belonging too City of Torrance, electric (not specified) \n", + " - The lowest cost per bus for an agency was $36,250.00, belonging too Oregon Department of Transportation on behalf of Central Oregon Intergovernmental Council, nan\n", "\n", "The agency with the most buses specified was Los Angeles County Metropolitan Transportation Authority (LA Metro) with 261.0 buses.\n", "\n", + "Regarding propulsion types:\n", + " Propulsion type values varied wildly amongst the datasets. Values were validated and grouped as best as poissible based on project description or other discrete mentions of propulsion type.\n", + " The following is a summary of propulsion type metrics.\n", + " - The most common propulsion type that was procureded was \"zero-emission bus (not specified)\".\n", + " \n", + " \n", + "\n", "After removing outliers, the following was discovered:\n", - " - the mean cost per bus is $940,997.86.\n", - " - the standard deviation is $732,993.50. \n", + " - the mean cost per bus is $932,164.87.\n", + " - the standard deviation is $735,398.28. \n", "\n", "Below are charts that summarize the data.\n", "\n" @@ -652,23 +1283,58 @@ } ], "source": [ - "print(summary)" + "print(summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "a0a54bec-cbe4-4a8b-81d5-79ff68d8de09", + "metadata": {}, + "outputs": [], + "source": [ + "# chart function\n", + "def make_chart(y_col, title, x_col=\"agency_name\"):\n", + " zscore_bus.sort_values(by=y_col, ascending=False).head(10).plot(\n", + " x=x_col, y=y_col, kind=\"bar\", color=\"skyblue\"\n", + " )\n", + " plt.title(title)\n", + " plt.xlabel(x_col)\n", + " plt.ylabel(y_col)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "cec20a56-e1c3-414d-9583-fa4159e5045e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Higest awarded funds by agency\n", + "make_chart(\"project_award_amount\", \"Highest Awarded Funds by Transit Agency\")" ] }, { "cell_type": "code", - "execution_count": 82, - "id": "82d2d419-0913-436a-9570-f17bd895fe23", - "metadata": { - "jupyter": { - "source_hidden": true - }, - "tags": [] - }, + "execution_count": 53, + "id": "e21f18c5-577c-444c-ada9-c959b57c37bc", + "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -678,30 +1344,283 @@ } ], "source": [ - "#bar chart of highest cost per bus\n", - "concat.sort_values(by='cost_per_bus', ascending=False).head(10).plot(x='project_sponsor', y='cost_per_bus', kind='bar', color='skyblue')\n", - "plt.title('Top 10 Agencies with highest Cost per bus')\n", - "plt.xlabel('Transit Agnecies')\n", - "plt.ylabel('$ (million)')\n", - "plt.show()" + "# highest cost per bus by agency\n", + "make_chart(\"cost_per_bus\", \"Highest cost per bus by Transit Agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "d374e1c3-0df6-4acd-ab04-dfe8f013ea85", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Highest bus count\n", + "make_chart(\"bus_count\", \"Highest Bus Count by Agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "1f0e6eee-1aa1-4b88-a8d1-cd3bdec5afdf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "zero-emission bus (not specified) 21\n", + "BEB 19\n", + "electric (not specified) 19\n", + "CNG 15\n", + "low emission (hybrid) 15\n", + "FCEB 6\n", + "low emission (propane) 5\n", + "mix (zero and low emission buses) 3\n", + "not specified 2\n", + "mix (BEB and FCEB) 1\n", + "mix (low emission) 1\n", + "Name: prop_type, dtype: int64" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zscore_bus[\"prop_type\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "f440450e-1ce3-45bf-8f1c-7e774a43cbe6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# COST PER BUS BY PROP TYPE\n", + "# why is prop type not consolidated?\n", + "make_chart(\"project_award_amount\", \"award amount by propulsion type\", x_col=\"prop_type\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "a6d5eb26-7594-4ff4-b636-6fa6227937ec", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# COST PER BUS BY PROP TYPE\n", + "make_chart(\"bus_count\", \"bus count by propulsion type\", x_col=\"prop_type\")" ] }, { "cell_type": "code", - "execution_count": 83, - "id": "bb59c791-174c-4faa-a439-2571158589d6", + "execution_count": 49, + "id": "30f8a34b-0514-43e7-9f1c-06591308e9b3", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# COST PER BUS BY PROP TYPE\n", + "make_chart(\"cost_per_bus\", \"cost per bus by propulsion type\", x_col=\"prop_type\")" + ] + }, + { + "cell_type": "markdown", + "id": "eaf73856-2e5b-48be-9107-a8ca97b22f1f", "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, + "source": [ + "## Summary Stats" + ] + }, + { + "cell_type": "markdown", + "id": "d645f949-ddb4-4b58-ab0a-0889a5ad6f56", + "metadata": {}, + "source": [ + "### Z-score for cost_per_bus " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "71a64926-2ea5-4bbf-aae9-d243c3830ada", + "metadata": {}, + "outputs": [], + "source": [ + "# add new column for z-score\n", + "all_bus[\"zscore_cost_per_bus\"] = zscore(all_bus[\"cost_per_bus\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5111734e-6f0e-4d99-a585-94f80341ca11", + "metadata": {}, "outputs": [ { "data": { - "image/png": "", "text/plain": [ - "
" + "pandas.core.frame.DataFrame" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(133, 10)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Unnamed: 0 int64\n", + "agency_name object\n", + "project_title object\n", + "project_award_amount int64\n", + "bus_count float64\n", + "prop_type object\n", + "bus_size_type object\n", + "source object\n", + "cost_per_bus int64\n", + "zscore_cost_per_bus float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0agency_nameproject_titleproject_award_amountbus_countprop_typebus_size_typesourcecost_per_buszscore_cost_per_bus
3333Illinois Department of Transportation on behal...Illinois Department of Transportation Statewid...1229937750.0BEBnot specifiedfta_press_release245987-0.701328
8989The Colorado Department of Transportation (CDO...Mountain Express Low-E Fleet Replacement and E...7531183.0low emission (propane)not specifiedfta_press_release251039-0.696832
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 agency_name \\\n", + "33 33 Illinois Department of Transportation on behal... \n", + "89 89 The Colorado Department of Transportation (CDO... \n", + "\n", + " project_title project_award_amount \\\n", + "33 Illinois Department of Transportation Statewid... 12299377 \n", + "89 Mountain Express Low-E Fleet Replacement and E... 753118 \n", + "\n", + " bus_count prop_type bus_size_type source \\\n", + "33 50.0 BEB not specified fta_press_release \n", + "89 3.0 low emission (propane) not specified fta_press_release \n", + "\n", + " cost_per_bus zscore_cost_per_bus \n", + "33 245987 -0.701328 \n", + "89 251039 -0.696832 " ] }, "metadata": {}, @@ -709,28 +1628,126 @@ } ], "source": [ - "#bar chart of highest bus count\n", - "concat.sort_values(by='bus_count', ascending=False).head(10).plot(x='project_sponsor', y='bus_count', kind='bar', color='skyblue')\n", - "plt.title('Top 10 Agencies with most bus count')\n", - "plt.xlabel('Transit Agnecies')\n", - "plt.ylabel('# of buses')\n", - "plt.show()" + "df_peek(all_bus)" ] }, { "cell_type": "code", - "execution_count": 84, - "id": "50720d96-faa4-49f6-b940-a5a497325d0d", - "metadata": { - "jupyter": { - "source_hidden": true + "execution_count": 14, + "id": "b074a5a1-8479-4366-9dff-5533263b45c1", + "metadata": {}, + "outputs": [], + "source": [ + "# remove outliers\n", + "# filter df for zscores between -3 and 3\n", + "zscore_bus = all_bus[\n", + " (all_bus[\"zscore_cost_per_bus\"] >= -3) & (all_bus[\"zscore_cost_per_bus\"] <= 3)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "8e2309e9-c6f0-4718-a5a9-17991345d243", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "131" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.3732393072982587" + ] + }, + "metadata": {}, + "output_type": "display_data" }, + { + "data": { + "text/plain": [ + "-0.8879722054734137" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# check work\n", + "\n", + "# 2 rows were dropped\n", + "# max and min are withing -3 to 3\n", + "display(\n", + " len(zscore_bus),\n", + " zscore_bus.zscore_cost_per_bus.max(),\n", + " zscore_bus.zscore_cost_per_bus.min(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "8afb8e45-27dc-4535-9ea0-7182ba7e66bf", + "metadata": {}, + "outputs": [], + "source": [ + "cpb_mean = zscore_bus.cost_per_bus.mean()\n", + "cpb_std = zscore_bus.cost_per_bus.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "842a0d19-f772-464d-acda-8b8a884bc860", + "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "# distribution curve of cost per bus. no outliers\n", + "def dist_curve(df, mean, std):\n", + " sns.histplot(df[\"cost_per_bus\"], kde=True, color=\"skyblue\", bins=20)\n", + " plt.axvline(\n", + " mean, color=\"red\", linestyle=\"dashed\", linewidth=2, label=f\"Mean: ${mean:,.2f}\"\n", + " )\n", + "\n", + "\n", + " plt.axvline(\n", + " mean + std,\n", + " color=\"green\",\n", + " linestyle=\"dashed\",\n", + " linewidth=2,\n", + " label=f\"Standard Deviation: ${std:,.2f}\",\n", + ")\n", + " plt.axvline(mean - std, color=\"green\", linestyle=\"dashed\", linewidth=2)\n", + " plt.axvline(mean + std * 2, color=\"green\", linestyle=\"dashed\", linewidth=2)\n", + " plt.axvline(mean + std * 3, color=\"green\", linestyle=\"dashed\", linewidth=2)\n", + "\n", + " plt.title(\"Cost Per Bus Distribution with Mean and Standard Deviation\")\n", + " plt.xlabel(\"cost per bus ($ million(s))\")\n", + " plt.ylabel(\"Frequency\")\n", + " plt.legend()\n", + " plt.show()\n", + "\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "63cb0a1d-dd80-49c1-93c7-da285acd523a", + "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -740,35 +1757,20 @@ } ], "source": [ - "# distribution curve of cost per bus. no outliers\n", - "sns.histplot(filtered['cost_per_bus'], kde=True, color='skyblue', bins=20)\n", - "plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label=f'Mean: ${mean:,.2f}')\n", - "plt.axvline(mean + std_dev, color='green', linestyle='dashed', linewidth=2, label=f'Standard Deviation: ${std_dev:,.2f}')\n", - "plt.axvline(mean - std_dev, color='green', linestyle='dashed', linewidth=2)\n", - "plt.axvline(mean + std_dev*2, color='green', linestyle='dashed', linewidth=2)\n", - "plt.axvline(mean + std_dev*3, color='green', linestyle='dashed', linewidth=2)\n", - "\n", - "plt.title('Cost Per Bus Distribution with Mean and Standard Deviation')\n", - "plt.xlabel('cost per bus ($ million(s))')\n", - "plt.ylabel('Frequency')\n", - "plt.legend()\n", - "plt.show()" + "dist_curve(zscore_bus, cpb_mean, cpb_std)" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 90, "id": "fe7a6649-64d0-4838-8c31-0facd96a5462", "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [ { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAHHCAYAAACx7iyPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAA9hAAAPYQGoP6dpAABrcklEQVR4nO3deXhU1f0G8PfOPpN9XyAECBA2SRCEgiBQ0IBAQS0qigQEtJa4FJdKa2WzP8QWt4KgVgiKKGIRa1EE2RQBlU0WwxZIwpKEJJBlMpn9/P6YZGTIQhJmMpPk/TzPPHXu3OU7N9OZl3PPPUcSQggQERERtXIybxdARERE5AsYioiIiIjAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiJqUdq3b48pU6Z4/DhZWVmQJAnp6enOZVOmTIG/v7/Hj11FkiTMnTu3yY5HLR9DEbVYmZmZePTRR9GxY0doNBoEBgbi1ltvxRtvvIGKigq3H89gMGDu3LnYsWNHvdbfsWMHJElyPpRKJTp27IjJkyfjzJkzbq/vWunp6S7HlyQJkZGRGDZsGL766iuPH/9qVT+w13tc/QNcm127dmHUqFFo06YNNBoN2rVrh7Fjx2LNmjWefyNuNnToUOd7l8lkCAwMRGJiIh566CFs2bLFbcf58ssvfTZc+HJt1PIovF0AkSds3LgREyZMgFqtxuTJk9GzZ0+YzWbs2rULzz77LI4dO4Z33nnHrcc0GAyYN28eAMePWX098cQTuOWWW2CxWHDgwAG888472LhxI44cOYLY2Fi31liT+fPno0OHDhBCID8/H+np6bjzzjvxxRdfYMyYMR4/PgBERETggw8+qPE1m82GWbNmQa/Xo3fv3nXuZ926dbjvvvuQnJyMJ598EiEhITh79iy+/fZbvPvuu3jggQc8Ub5HtW3bFgsXLgQAlJeX4/Tp01i/fj1Wr16Ne++9F6tXr4ZSqXSuf+LECchkDfv37pdffomlS5c2KHzEx8ejoqLC5dieUFdtFRUVUCj4M0buw08TtThnz57F/fffj/j4eGzbtg0xMTHO12bOnInTp09j48aNXqzQ1eDBg/H73/8eADB16lR06dIFTzzxBFatWoXZs2ff0L7Ly8vh5+dX5zqjRo1C3759nc+nTZuGqKgofPTRR00Wivz8/DBp0qQaX3vhhRdw+fJlLF68GElJSXXuZ+7cuejevTv27t0LlUrl8tqlS5fcVu/1CCFgNBqh1WpveF9BQUHVzs3LL7+MJ554Am+99Rbat2+PRYsWOV9Tq9U3fMy6WK1W2O12qFQqaDQajx7rerx9fGp5ePmMWpxXXnkFer0e7733nksgqtKpUyc8+eSTzudWqxULFixAQkIC1Go12rdvj7/85S8wmUwu2+3btw8pKSkIDw+HVqtFhw4d8PDDDwNwXP6JiIgAAMybN895yaMxzf6//e1vATjCXZWvvvoKgwcPhp+fHwICAjB69GgcO3bMZbuq/hyZmZm48847ERAQgAcffLDBxw8ODoZWq3X5F3jVpb5rLw3W1K8kLy8PU6dORdu2baFWqxETE4Nx48YhKyurwbVs3boVCxcuxJ133ok//elP110/MzMTt9xyS7VABACRkZEuz+12O9544w3cdNNN0Gg0iIiIwMiRI7Fv3z7nOvX9bLRv3x5jxozB119/jb59+0Kr1eLtt98GABQXF+Opp55CXFwc1Go1OnXqhEWLFsFutzf4fFSRy+V488030b17dyxZsgQlJSUutVzdp8hisWDevHno3LkzNBoNwsLCMGjQIOfltylTpmDp0qUA4HKpEvj17/vPf/4Tr7/+uvM8/PLLLzX+7aucOXMGKSkp8PPzQ2xsLObPnw8hhPP1+n6e6qqtatm1/x87ePAgRo0ahcDAQPj7+2P48OHYu3evyzpVl46///57zJo1CxEREfDz88Ndd92FgoKC6/8BqMViSxG1OF988QU6duyIgQMH1mv96dOnY9WqVfj973+Pp59+Gj/88AMWLlyIjIwMfPbZZwAcrQx33HEHIiIi8PzzzyM4OBhZWVlYv349AMfln2XLluGxxx7DXXfdhbvvvhsA0KtXrwbXn5mZCQAICwsDAHzwwQdITU1FSkoKFi1aBIPBgGXLlmHQoEE4ePAg2rdv79zWarUiJSUFgwYNwj//+U/odLrrHq+kpASFhYUQQuDSpUv417/+Bb1eX2vLzfXcc889OHbsGB5//HG0b98ely5dwpYtW5CTk+NS6/Xk5+fjwQcfRHR0NFatWuXyY1ib+Ph4bN26FefPn0fbtm3rXHfatGlIT0/HqFGjMH36dFitVnz33XfYu3evs+WsPp+NKidOnMDEiRPx6KOPYsaMGUhMTITBYMCQIUNw4cIFPProo2jXrh12796N2bNnIzc3F6+//nq9z8e15HI5Jk6ciL/97W/YtWsXRo8eXeN6c+fOxcKFCzF9+nT069cPpaWl2LdvHw4cOIDbb78djz76KC5evIgtW7bUeglz5cqVMBqNeOSRR6BWqxEaGlprqLPZbBg5ciR+85vf4JVXXsGmTZswZ84cWK1WzJ8/v0HvsT61Xe3YsWMYPHgwAgMD8dxzz0GpVOLtt9/G0KFDsXPnTvTv399l/ccffxwhISGYM2cOsrKy8PrrryMtLQ1r165tUJ3UggiiFqSkpEQAEOPGjavX+ocOHRIAxPTp012WP/PMMwKA2LZtmxBCiM8++0wAED/99FOt+yooKBAAxJw5c+p17O3btwsAYsWKFaKgoEBcvHhRbNy4UbRv315IkiR++uknUVZWJoKDg8WMGTNcts3LyxNBQUEuy1NTUwUA8fzzz9fr+CtXrhQAqj3UarVIT0+vsdbt27e7LD979qwAIFauXCmEEOLKlSsCgPjHP/5RrxpqY7PZxO233y5kMlm1Y9blvffeEwCESqUSw4YNE3/729/Ed999J2w2m8t627ZtEwDEE088UW0fdrtdCFH/z4YQQsTHxwsAYtOmTS7rLliwQPj5+YmTJ0+6LH/++eeFXC4XOTk5db6fIUOGiB49etT6etXn8o033nCpJTU11fk8KSlJjB49us7jzJw5U9T0c1D19w0MDBSXLl2q8bWqv70Qv34GH3/8cecyu90uRo8eLVQqlSgoKBBC1P/zVFdtQohq/38bP368UKlUIjMz07ns4sWLIiAgQNx2223OZVWf/REjRjj/3kII8ac//UnI5XJRXFxc4/Go5ePlM2pRSktLAQABAQH1Wv/LL78EAMyaNctl+dNPPw0Azr5HwcHBAID//e9/sFgs7ijV6eGHH0ZERARiY2MxevRolJeXY9WqVejbty+2bNmC4uJiTJw4EYWFhc6HXC5H//79sX379mr7e+yxxxp0/KVLl2LLli3YsmULVq9ejWHDhmH69OnOVrCG0Gq1UKlU2LFjB65cudLg7au8/PLL2LJlC/761782qNP6ww8/jE2bNmHo0KHYtWsXFixYgMGDB6Nz587YvXu3c73//Oc/kCQJc+bMqbaPqhap+n42qnTo0AEpKSkuy9atW4fBgwcjJCTE5e83YsQI2Gw2fPvtt/V+bzWpuv29rKys1nWCg4Nx7NgxnDp1qtHHueeee5yXh+sjLS3N+d+SJCEtLQ1msxnffPNNo2u4HpvNhs2bN2P8+PHo2LGjc3lMTAweeOAB7Nq1y/n9UOWRRx5xaYEcPHgwbDYbsrOzPVYn+TZePqMWJTAwEEDdPxJXy87OhkwmQ6dOnVyWR0dHIzg42PnlOGTIENxzzz2YN28eXnvtNQwdOhTjx4/HAw88cMMdW1988UUMHjwYcrkc4eHh6Natm7M/T9UPWVU/o2tVvd8qCoXiupeNrtWvXz+XjtYTJ05E7969kZaWhjFjxtTYP6c2arUaixYtwtNPP42oqCj85je/wZgxYzB58mRER0fXax/ff/895syZg8GDB9cYWsxmMy5fvuyyLCIiAnK5HACQkpKClJQUGAwG7N+/H2vXrsXy5csxZswYHD9+HJGRkcjMzERsbCxCQ0NrraO+n40qHTp0qLaPU6dO4fDhw7UGihvt/K3X6wHU/Y+A+fPnY9y4cejSpQt69uyJkSNH4qGHHmrQpd2a3lttZDKZSygBgC5dugBAo/qV1VdBQQEMBgMSExOrvdatWzfY7XacO3cOPXr0cC5v166dy3ohISEAcEOBnpo3hiJqUQIDAxEbG4ujR482aLvr9VeRJAmffvop9u7diy+++AJff/01Hn74YSxevBh79+69oQHrbrrpJowYMaLG16r6bXzwwQc1hoprb0dWq9UNvh37WjKZDMOGDcMbb7yBU6dOoUePHrWeH5vNVm3ZU089hbFjx2LDhg34+uuv8be//Q0LFy7Etm3brntL/eXLlzFx4kQEBgZizZo1zqBztd27d2PYsGEuy86ePVutv5JOp8PgwYMxePBghIeHY968efjqq6+Qmpp6nTPgqj59mQDUeKeZ3W7H7bffjueee67GbarCQmNVfc6vDW5Xu+2225CZmYnPP/8cmzdvxr///W+89tprWL58OaZPn16v47jjLrqrNeTz5Ek1fb4AuHQKp9aFoYhanDFjxuCdd97Bnj17MGDAgDrXjY+Ph91ux6lTp9CtWzfn8vz8fBQXFyM+Pt5l/d/85jf4zW9+g7///e9Ys2YNHnzwQXz88ceYPn16vX88GyIhIQGA486p2oKTJ1itVgC/tkRU/Qu6uLjYZb3aLjMkJCTg6aefxtNPP41Tp04hOTkZixcvxurVq+s87pQpU3Du3Dl8/vnntbZ4JSUlVRu48HqtUFUtYbm5uc76vv76a1y+fLnW1qKGfjZqkpCQAL1e75G/nc1mw5o1a6DT6TBo0KA61w0NDcXUqVMxdepU6PV63HbbbZg7d64zFLnzs2u323HmzBmXwHfy5EkAcAbXhnye6ltbREQEdDodTpw4Ue2148ePQyaTIS4url77otaLfYqoxXnuuefg5+eH6dOnIz8/v9rrmZmZeOONNwAAd955JwBUuwvo1VdfBQDnHT1Xrlyp9q/H5ORkAHDenl11p9e1X/Q3IiUlBYGBgfi///u/GvsyeeL2YYvFgs2bN0OlUjnDQHx8PORyebU+MG+99ZbLc4PBAKPR6LIsISEBAQEB1W5jv9brr7+OL774Ao8//jh+97vf1bpeSEgIRowY4fKoGq9m69atNW5T1T+o6tLKPffcAyGEc7DNq1X9nev72ajLvffeiz179uDrr7+u9lpxcbEzfDaUzWbDE088gYyMDDzxxBPVLqNeraioyOW5v78/OnXq5PL3qBrLyl2f3SVLljj/WwiBJUuWQKlUYvjw4QDq/3lqSG1yuRx33HEHPv/8c5fLdPn5+VizZg0GDRpU53kiAthSRC1QQkIC1qxZg/vuuw/dunVzGdF69+7dWLdunXMcl6SkJKSmpuKdd95BcXExhgwZgh9//BGrVq3C+PHjnZdpVq1ahbfeegt33XUXEhISUFZWhnfffReBgYHOH0+tVovu3btj7dq16NKlC0JDQ9GzZ0/07Nmz0e8lMDAQy5Ytw0MPPYSbb74Z999/PyIiIpCTk4ONGzfi1ltvdfkBaoyvvvoKx48fB+Do47JmzRqcOnUKzz//vPNHJCgoCBMmTMC//vUvSJKEhIQE/O9//6vWJ+bkyZMYPnw47r33XnTv3h0KhQKfffYZ8vPzcf/999daw+HDh/HnP/8Z/v7+SEpKqrVFqVevXnX2hRk3bhw6dOiAsWPHIiEhAeXl5fjmm2/wxRdf4JZbbsHYsWMBAMOGDcNDDz2EN998E6dOncLIkSNht9vx3XffYdiwYUhLS6v3Z6Muzz77LP773/9izJgxmDJlCvr06YPy8nIcOXIEn376KbKyshAeHl7nPkpKSpznw2AwOEe0zszMxP33348FCxbUuX337t0xdOhQ9OnTB6Ghodi3bx8+/fRTl87Qffr0AeAYXT0lJQVyubzOv1ddNBoNNm3ahNTUVPTv3x9fffUVNm7ciL/85S/OvlX1/Tw1tLaXXnoJW7ZswaBBg/DHP/4RCoUCb7/9NkwmE1555ZVGvR9qZbx67xuRB508eVLMmDFDtG/fXqhUKhEQECBuvfVW8a9//UsYjUbnehaLRcybN0906NBBKJVKERcXJ2bPnu2yzoEDB8TEiRNFu3bthFqtFpGRkWLMmDFi3759LsfcvXu36NOnj1CpVNe9Pb/qtuR169Zd971s375dpKSkiKCgIKHRaERCQoKYMmWKy/FTU1OFn59fvc9PTbfkazQakZycLJYtW+Zyq7IQjiEH7rnnHqHT6URISIh49NFHxdGjR11uoS4sLBQzZ84UXbt2FX5+fiIoKEj0799ffPLJJw2upabH9YY7+Oijj8T9998vEhIShFarFRqNRnTv3l389a9/FaWlpS7rWq1W8Y9//EN07dpVqFQqERERIUaNGiX279/vXKc+nw0hHLfB13bbe1lZmZg9e7bo1KmTUKlUIjw8XAwcOFD885//FGazuc73M2TIEJf37+/vLzp37iwmTZokNm/eXOM2196S/9JLL4l+/fqJ4OBgodVqRdeuXcXf//53l2NbrVbx+OOPi4iICCFJkvMW+Kpb5GsaYqG2W/L9/PxEZmamuOOOO4ROpxNRUVFizpw51YZFqM/nqa7ahKh+S74Qjv+vpqSkCH9/f6HT6cSwYcPE7t27Xdap+rxdO8RGbUMFUOshCcEeZURERETsU0REREQEhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiIiIgLAwRtrZLfbcfHiRQQEBHhk6gYiIiJyPyEEysrKEBsb26h5IBmKanDx4kXOkUNERNRMnTt3rtb5E+vCUFSDgIAAAI6TyrlyiIiImofS0lLExcU5f8cbiqGoBlWXzAIDAxmKiIiImpnGdn1hR2siIiIiMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAQOHtAlqjnJwcFBYWun2/4eHhaNeundv3S0RE1BowFDWxnJwcdOvWDQaDwe371ul0yMjIYDAiIiJqBIaiJlZYWAiDwYAXlryH+E6Jbttv9ukTeCltGgoLCxmKiIiIGoGhyEviOyUisVeyt8sgIiKiSuxoTURERASGIiIiIiIADEVEREREABiKiIiIiAAwFBEREREBYCgiIiIiAsBQRERERASAoYiIiIgIAEMREREREQCGIiIiIiIADEVEREREABiKiIiIiAAwFBEREREBYCgiIiIiAsBQRERERASAoYiIiIgIAEMREREREQCGIiIiIiIADEVEREREABiKiIiIiAB4ORR9++23GDt2LGJjYyFJEjZs2ODyuiRJNT7+8Y9/1LrPuXPnVlu/a9euHn4nRERE1Nx5NRSVl5cjKSkJS5curfH13Nxcl8eKFSsgSRLuueeeOvfbo0cPl+127drlifKJiIioBVF48+CjRo3CqFGjan09Ojra5fnnn3+OYcOGoWPHjnXuV6FQVNuWiIiIqC7Npk9Rfn4+Nm7ciGnTpl133VOnTiE2NhYdO3bEgw8+iJycnCaokIiIiJozr7YUNcSqVasQEBCAu+++u871+vfvj/T0dCQmJiI3Nxfz5s3D4MGDcfToUQQEBNS4jclkgslkcj4vLS11a+1ERETk+5pNKFqxYgUefPBBaDSaOte7+nJcr1690L9/f8THx+OTTz6ptZVp4cKFmDdvnlvrJSIioualWVw+++6773DixAlMnz69wdsGBwejS5cuOH36dK3rzJ49GyUlJc7HuXPnbqRcIiIiaoaaRSh677330KdPHyQlJTV4W71ej8zMTMTExNS6jlqtRmBgoMuDiIiIWhevhiK9Xo9Dhw7h0KFDAICzZ8/i0KFDLh2jS0tLsW7dulpbiYYPH44lS5Y4nz/zzDPYuXMnsrKysHv3btx1112Qy+WYOHGiR98LERERNW9e7VO0b98+DBs2zPl81qxZAIDU1FSkp6cDAD7++GMIIWoNNZmZmSgsLHQ+P3/+PCZOnIiioiJERERg0KBB2Lt3LyIiIjz3RoiIiKjZ82ooGjp0KIQQda7zyCOP4JFHHqn19aysLJfnH3/8sTtKIyIiolamWfQpIiIiIvI0hiIiIiIiMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiIiIgIAKLxdALlXRkaGR/YbHh6Odu3aeWTfnpKTk4PCwkK377c5ngsiIro+hqIWouhSHiBJmDRpkkf2r9PpkJGR0WzCQE5ODrp16waDweD2fTe3c0FERPXDUNRC6EtKACGQtmAxkm7p79Z9Z58+gZfSpqGwsLDZBIHCwkIYDAa8sOQ9xHdKdNt+m+O5ICKi+mEoamHadEhAYq9kb5fhM+I7JfJ8EBFRvbCjNREREREYioiIiIgAMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAWAoIiIiIgLg5VD07bffYuzYsYiNjYUkSdiwYYPL61OmTIEkSS6PkSNHXne/S5cuRfv27aHRaNC/f3/8+OOPHnoHRERE1FJ4NRSVl5cjKSkJS5curXWdkSNHIjc31/n46KOP6tzn2rVrMWvWLMyZMwcHDhxAUlISUlJScOnSJXeXT0RERC2IwpsHHzVqFEaNGlXnOmq1GtHR0fXe56uvvooZM2Zg6tSpAIDly5dj48aNWLFiBZ5//vkbqpeIiIhaLq+GovrYsWMHIiMjERISgt/+9rd46aWXEBYWVuO6ZrMZ+/fvx+zZs53LZDIZRowYgT179tR6DJPJBJPJ5HxeWlrqvjdAdcrJyUFhYaHb95uRkeH2fRIRUcvm06Fo5MiRuPvuu9GhQwdkZmbiL3/5C0aNGoU9e/ZALpdXW7+wsBA2mw1RUVEuy6OionD8+PFaj7Nw4ULMmzfP7fVT3XJyctCtWzcYDAaPHUOv13ts30RE1LL4dCi6//77nf990003oVevXkhISMCOHTswfPhwtx1n9uzZmDVrlvN5aWkp4uLi3LZ/qllhYSEMBgNeWPIe4jslunXfe7dvxnuL5sNoNLp1v0RE1HL5dCi6VseOHREeHo7Tp0/XGIrCw8Mhl8uRn5/vsjw/P7/OfklqtRpqtdrt9VL9xHdKRGKvZLfuM/vUCbfuj4iIWr5mNU7R+fPnUVRUhJiYmBpfV6lU6NOnD7Zu3epcZrfbsXXrVgwYMKCpyiQiIqJmyKstRXq9HqdPn3Y+P3v2LA4dOoTQ0FCEhoZi3rx5uOeeexAdHY3MzEw899xz6NSpE1JSUpzbDB8+HHfddRfS0tIAALNmzUJqair69u2Lfv364fXXX0d5ebnzbjTyfUIIWAVgtgnYhIDVDgg4HhIAuQTIZRJUMkApkyCTJC9XTERELYFXQ9G+ffswbNgw5/Oqfj2pqalYtmwZDh8+jFWrVqG4uBixsbG44447sGDBApdLXZmZmS53L913330oKCjAiy++iLy8PCQnJ2PTpk3VOl+Tb1CoNbAotTivt8BgFTBY7aiwCdhF/fehkknQKiToFBL8lTIEKJtVAygREfkIr4aioUOHQojaf/2+/vrr6+4jKyur2rK0tDRnyxH5nstGG06VmHAIkZjzbSZKlCqU6K3V1lNIgEImQS4BMkmCBMAOR2Cy2gGLXUAAMNsFzGaBEjMA2AAA2r63494FS4HAMFjtAgoZW5OIiKhuzaqjNTVfFVY7jl424ehlI/IrHMEFkgYKJSCzWRGiU8FfKYNOIYNWIUEllyC/zmUxIQQsdsBos6PCKlButUNvEdBb7JCpteg9+l4AwI+XjAhRyxChVSBULePlNiIiqhFDEXlUodGKH/IrkHHFBGtlo6AEoH2AEurSfEwfPwoL330fXdskN3jfkiRBJQdUcjkCVb8utwmB7Vu34cd9+zBk4nQItQ6XTXZcNpmhlAFRWgWidXKo5bzMRkREv2IoIo8oqLDiu1wDTjquaQEAIrVyJIdp0DVYDZ1ShgMHMlF07gzc3W4jlyTYSwqx6c0FGNKvD3oP/i0uGW0oqLDCbAfOl1txodyKCK0cbfwU0CkYjoiIiKGI3ExvseO73HIcLjKhqrdYlyAV+kdpEatTQPLCpSudUob2Shni/RW4bLIj12BFidmOSxU2XKqwIVIrRzt/JdRyXlYjImrNGIrILYQQOFhoxM6LBpgqbx3rEqTC4BgdIrS+8TGTJAlhGjnCNHKUme04X27BZZMjHBVW2BDrp0Bbf8V1+zIREVHL5Bu/VtSsFZts+F92Gc6XO+4gi9EpMLyNH9r6K71cWe0CVDJ0U6lRZrYjq8yCUosd58utKDDakBCoRIi6+tx6RETUsjEU0Q05etmIzefKYbYLqGQShsTq0Dtc02zu8ApQydAzVIUikx1nSy0w2QR+uWJGpFaODgFK3spPRNSKMBRRo1jtAt+cL8ehIseEq239FBgTH4DgZtjCIkkSwjVyhKhkyNFbcNHg6GtUbLKjS7ASQarm956IiKjhGIqowfQWOz47W4oLlZfLBkXrMDBa22xah2ojl0noEKhCmMaGUyUWGG0CRy+b0c5fgbZ+3ukkTkRETYehiBqkyGjF2sxSlJrtUMsljGsfgI5XDxLUAgSq5EgKk+FMqQUFRhty9FaUWezoEtSy3icREbliKKJ6K4EKq0+WoMImEKKWYULHIIRqWualJYVMQpdgFYIMVpwpteCKyY7DRSZo5I7O4xkZGR45rslkcpnbz13Cw8PRrl07t++XiKglYSiiemnX6xbsQyRsNoEYnQITOgZC1womXo3SKeCnlCHjihkVNoGKoDi0S+qHSZMmeeR4kiTVOR9gY+l0OmRkZDAYERHVgaGIrsui0ODhpZ/AJskQ76/EPR0DoWpFAx36K2VIClMjo9gEPZSYvuxTlB7di8R2sW49zt7tm/HeovlIW7AYSbf0d9t+s0+fwEtp01BYWMhQRERUB4YiqpPeYkdpcBzUMhlChRG/TwiDshXepq6SS+gZosau41lQhkYjrM9QhASrEOnGgSmzT50AALTpkIDEXslu2y8REdVPy7/+QY1WYbXjlysmCJkMmT99h94oaJWBqIpcJsGU8RN+2vAhIEk4VWJBrsHq7bKIiMhNGIqoRubKQQwtdkBuMeKDWZMhh/v7ujQ/Ap8t+BNQdBEAcKbUgjwGIyKiFoGhiKqxC4HjxWYYbQIauYSg4vMwleu9XZbPEEIA+WcRq3NcOsssteBSBYMREVFzx1BELoQQOFNqQZnFDrkEdAtRQSZs3i7LJ7UPUCBG5xiS4FSJBYVGniciouaMoYhc5FXYkF/h+HFPDFZBp+BHpDaSJKFDgBJRWkcwOllsRomJwYiIqLniLx456S2OSVEBID5AwZni60GSJCQEKhGmlkEAyCg2o9xi93ZZRETUCAxFBMAxweuJYjMEgFC1DG10HK2hviTJMfp1oFIGmwB+uWKG2cZO6UREzQ1DEUEIgcxSxwSoapmEzkEqTn7aQDJJQrcQFbRyCWa7QEaxCXYPjExNRESew1BEKDTaUGi0QYKjH5GiFY9FdCMUMkcwUkiA3iJwqsTikSk7iIjIMxiKWjmzzXG3GQC09VcgQMWPxI3QKmRIDFZBgiNs5hrY8ZqIqLngL2ArJoTA6VIzrALwU0ho68d+RO4QrJajfYASAHC2zIISM4MREVFzwFDUihUYbbhiskMC0CVIBRn7EblNjE6OcI3j7r0Txex4TUTUHDAUtVIWu3Defh/nr4BOyY+CO0mShE6BSugUEix24GSJmf2LiIh8HH8JW6msMgusAtApJLThZTOPkMskJAarIJOAErMd58s5FQgRkS9jKGqFSs02XKoctTohUMnLZh6kU8jQsbJ/UY7eilL2LyIi8lkMRa1M1dxmABCllSNQxVGrPS1SK0eE5tc50my8jEZE5JMYilqZ/Aobyq0CcgmIr2zBIM+SJAkdA5VQySQYbQLZZRZvl0RERDVgKGpFrHaBHH1V52ollByksckoZBI6BTlCaK7Bxtv0iYh8EENRK3K+3AqLHdDKJcToeNmsqYWo5YjSXnUZzc7LaEREvoShqJUw2ey4WHn3U/sAdq72lvYBSqhlEkw2gSxeRiMi8ikMRa1Ejt4KASBQKUOImn92b7n6MlpehQ3FJl5GIyLyFV79dfz2228xduxYxMbGQpIkbNiwwfmaxWLBn//8Z9x0003w8/NDbGwsJk+ejIsXL9a5z7lz50KSJJdH165dPfxOfJvBanfegt8+QAmJrUReFayWI7ry8uVpXkYjIvIZXg1F5eXlSEpKwtKlS6u9ZjAYcODAAfztb3/DgQMHsH79epw4cQK/+93vrrvfHj16IDc31/nYtWuXJ8pvNqrudgpVyzjhq49o76+EWi7BZBc4x0EdiYh8gleHMh41ahRGjRpV42tBQUHYsmWLy7IlS5agX79+yMnJQbt27Wrdr0KhQHR0tFtrba70Fjsum+wAgHh/3oLvK+QyCR0DlMgoNuNiuRWRWnZ8JyLytmbVbFBSUgJJkhAcHFzneqdOnUJsbCw6duyIBx98EDk5OXWubzKZUFpa6vJoKc5V3oIfrpFzfjMfE6qRI0QtgwCcA2oSEZH3NJtfSaPRiD//+c+YOHEiAgMDa12vf//+SE9Px6ZNm7Bs2TKcPXsWgwcPRllZWa3bLFy4EEFBQc5HXFycJ95Ck7u6lSjOn/Ob+aKOAUrI4JgbTR7extvlEBG1as0iFFksFtx7770QQmDZsmV1rjtq1ChMmDABvXr1QkpKCr788ksUFxfjk08+qXWb2bNno6SkxPk4d+6cu9+CV7i0EimaxZ+61dEoZGhbGVhVHXpA7efv5YqIiFovn28+qApE2dnZ2LZtW52tRDUJDg5Gly5dcPr06VrXUavVUKvVN1qqTylnK1Gz0cZPgUsVNhhVGoz4w5+9XQ4RUavl080HVYHo1KlT+OabbxAWFtbgfej1emRmZiImJsYDFfquC5V3NIWpZWwl8nGyyrnRAGDAfdMBtc7LFRERtU5e/bXU6/U4dOgQDh06BAA4e/YsDh06hJycHFgsFvz+97/Hvn378OGHH8JmsyEvLw95eXkwm83OfQwfPhxLlixxPn/mmWewc+dOZGVlYffu3bjrrrsgl8sxceLEpn57XmO02VFgdIxL1JZ3nDULIWo5rIUXIVcogOgOEIJjFxERNTWvXlfZt28fhg0b5nw+a9YsAEBqairmzp2L//73vwCA5ORkl+22b9+OoUOHAgAyMzNRWFjofO38+fOYOHEiioqKEBERgUGDBmHv3r2IiIjw7JvxIVXTeQSpZPDnHWfNhjnrGERAKJR+wbhitiNUzdv0iYiakldD0dChQ+v8F3F9/rWclZXl8vzjjz++0bKaN7kC+ZWjV7fxY1+i5kSYKrD743cxJPVxZJVZEKKScfRxIqImxGaEliYkGnYB+CkkBHP06mZnx3uvA1YLKqzCGW6JiKhpsCmhBZErlECIo0N5rJ/C7a0MGRkZPr2/lsCoLwUKzwHRHZGjtyBCI4dcxtYiIqKmwFDUgtx0++8ApQpKmWNsIncpupQHSBImTZrktn1eTa/Xe2S/zdblPGjaJMBoE7hgsKIdO8sTETUJhqIWZODERwAAMToFZG5sJdKXlABCIG3BYiTd0t9t+927fTPeWzQfRqPRbftsGQTiA5Q4UWzGhXIrorUKqORsLSIi8jSGohZCFhCCuJ43A3Y7onWe+bO26ZCAxF7Jbttf9qkTbttXSxOmliFAKaHMIpCjt6BTkMrbJRERtXjsidtCKGM6Ov6jpABK9kFp9iRJQvsAx2Wz/AobDFa7lysiImr5GIpaALNNQB5WOWL35YveLYbcJlAlR6ja8X/Rc3qrl6shImr5GhWKzpw54+466AbkV1ghyWTIOvQjYDJ4uxxyo7jKTtaFRhsMFrYWERF5UqNCUadOnTBs2DCsXr2anWS9TAiBPINjPJsfPl3p5WrI3fyVMoRVthbl6C1eroaIqGVrVCg6cOAAevXqhVmzZiE6OhqPPvoofvzxR3fXRvVw2WSH2S4gLCYc/eYLb5dDHlDVWlRksqOcrUVERB7TqFCUnJyMN954AxcvXsSKFSuQm5uLQYMGoWfPnnj11VdRUFDg7jqpFnkGR18TS/45WM0mL1dDnuCnlDnHnWJrERGR59xQR2uFQoG7774b69atw6JFi3D69Gk888wziIuLw+TJk5Gbm+uuOqkGRqsdxWZHy4E1P8u7xZBHxfk7hlm4bLJDz9YiIiKPuKFQtG/fPvzxj39ETEwMXn31VTzzzDPIzMzEli1bcPHiRYwbN85ddVINLhkdfYmCVDIIIztYt2Q6hQwRbC0iIvKoRo3y9+qrr2LlypU4ceIE7rzzTrz//vu48847IZM5MlaHDh2Qnp6O9u3bu7NWuooQApcqJwyN1MrBG/Fbvjh/BQqMNlwx2VFmtiOAE/4SEblVo0LRsmXL8PDDD2PKlCmIiYmpcZ3IyEi89957N1Qc1a7EbIfJJiCXgDA3znNGvkurkCFSK8elChty9Bb0CFV7uyQiohalUaHo1KlT111HpVIhNTW1MbuneqhqJQrXyCF34zxn5Nvi/BS4VGFDsdnRt8hfydYiIiJ3adQ36sqVK7Fu3bpqy9etW4dVq1bdcFFUN6tdoKiyP1GUltPXtSaaq/oWnS/nKNdERO7UqFC0cOFChIeHV1seGRmJ//u//7vhoqhuhUYb7AC0cgn+SrYStTZt/BxBuMjIOdGIiNypUaEoJycHHTp0qLY8Pj4eOTk5N1wU1e3qDtYSL521On5KmXNOtAtsLSIicptGhaLIyEgcPny42vKff/4ZYWFhN1wU1c5gtaOscpyaSF46a7Xa+jlGuS6osMFoY2sREZE7NCoUTZw4EU888QS2b98Om80Gm82Gbdu24cknn8T999/v7hrpKlWtRCFqGVRythK1VgEqmWN8KgAX2VpEROQWjWpqWLBgAbKysjB8+HAoFI5d2O12TJ48mX2KPEgIgYIKxw8gW4morZ8CJWYz8g02tPUTDMlERDeoUb+sKpUKa9euxYIFC/Dzzz9Dq9XipptuQnx8vLvro6tcMdthtgMKCc4+JdR6Balk8FdK0FsELhqsaB+g9HZJRETN2g01N3Tp0gVdunRxVy10HQWVl84itHLI2MG61ZMkCW39lDhebEaewYq2fgooZPxcEBE1VqNCkc1mQ3p6OrZu3YpLly7Bbnft6Llt2za3FEe/stkFLhurQhEvnZFDqFoGnUKCwSqQV2F1dsAmIqKGa9Sv65NPPon09HSMHj0aPXv25G3hTeCyyTE2kUYuwV/B800OkiQhVqfA6VILcsutiNUp2IpIRNRIjQpFH3/8MT755BPceeed7q6HalFg/HVaD4ZQulqEVo5svQVmu2NgT3bCJyJqnEb11lWpVOjUqZO7a6FaWOwCxSbHJcoITv5K15BVthYBjsEchRBeroiIqHlqVCh6+umn8cYbb/DLt4kUGW0QAHQKCTpOAEo1iNIpIJMAg1WgxMzBHImIGqNR7ey7du3C9u3b8dVXX6FHjx5QKl07d65fv94txZFDYVUHa7YSUS2UMglRWjlyDTZcKLciWM3PChFRQzUqFAUHB+Ouu+5ydy1UA7Pt13/5hzMUUR1idQrkGmwoNttRbrHDj62KREQN0qhQtHLlSnfXQbWoaiXyV0rQKPgjR7XTKGQI08hRZLThYrkVnYNV3i6JiKhZafSvrNVqxTfffIO3334bZWVlAICLFy9Cr9e7rTgCCo2OaT0iNLyjiK6vTWWH6wKjDSYb+/wRETVEo35ps7OzMXLkSOTk5MBkMuH2229HQEAAFi1aBJPJhOXLl7u7zlbJaLWjzOL4YeOlM6qPAJUMgUoZSi125HLqDyKiBmlUS9GTTz6Jvn374sqVK9Bqtc7ld911F7Zu3eq24lq7qktnQSoZJ/ukeov1c/xbJ99ghY13iBIR1VujQtF3332HF154ASqVa5+F9u3b48KFC/Xez7fffouxY8ciNjYWkiRhw4YNLq8LIfDiiy8iJiYGWq0WI0aMwKlTp66736VLl6J9+/bQaDTo378/fvzxx3rX5EuuHrCRqL5C1TKo5RKs4tf58oiI6PoaFYrsdjtstupftufPn0dAQEC991NeXo6kpCQsXbq0xtdfeeUVvPnmm1i+fDl++OEH+Pn5ISUlBUajsdZ9rl27FrNmzcKcOXNw4MABJCUlISUlBZcuXap3Xb7AYLXDYBWQAIQxFFEDSJKEGJ3jM5NrsIJtRURE9dOoUHTHHXfg9ddfdz6XJAl6vR5z5sxp0NQfo0aNwksvvVTj7f1CCLz++ut44YUXMG7cOPTq1Qvvv/8+Ll68WK1F6WqvvvoqZsyYgalTp6J79+5Yvnw5dDodVqxY0ZC36HVFla1EwSoZlJz5nBooSvvrYI4Wpc7b5RARNQuNCkWLFy/G999/j+7du8NoNOKBBx5wXjpbtGiRWwo7e/Ys8vLyMGLECOeyoKAg9O/fH3v27KlxG7PZjP3797tsI5PJMGLEiFq3AQCTyYTS0lKXh7dVhSK2ElFjKGQSIrWOz45RF+zdYoiImolG3X3Wtm1b/Pzzz/j4449x+PBh6PV6TJs2DQ8++KBLx+sbkZeXBwCIiopyWR4VFeV87VqFhYWw2Ww1bnP8+PFaj7Vw4ULMmzfvBit2H6PVjnKr46JHKEMRNVKMToE8gw1mlT9C2sR7uxwiIp/X6MFvFAoFJk2a5M5avGb27NmYNWuW83lpaSni4uK8Vk+h6de7znjpjBpLp5AhWCVDsdmOgfdN83Y5REQ+r1Gh6P3336/z9cmTJzeqmKtFR0cDAPLz8xETE+Ncnp+fj+Tk5Bq3CQ8Ph1wuR35+vsvy/Px85/5qolaroVarb7hmd7nMS2fkJjF+ChSbzeg77kFYUeLtcoiIfFqjQtGTTz7p8txiscBgMEClUkGn07klFHXo0AHR0dHYunWrMwSVlpbihx9+wGOPPVbjNiqVCn369MHWrVsxfvx4AI475bZu3Yq0tLQbrqkpmGzCOWBjGCf1pBsUopJBZjVDExCIi8Lq7XKIiHxaozpaX7lyxeWh1+tx4sQJDBo0CB999FG996PX63Ho0CEcOnQIgKNz9aFDh5CTkwNJkvDUU0/hpZdewn//+18cOXIEkydPRmxsrDPwAMDw4cOxZMkS5/NZs2bh3XffxapVq5CRkYHHHnsM5eXlmDp1amPeapOr6mAdoOSAjXTjJEmCtuIKACAHARAczJGIqFZum1Crc+fOePnllzFp0qQ6OzVfbd++fRg2bJjzeVW/ntTUVKSnp+O5555DeXk5HnnkERQXF2PQoEHYtGkTNBqNc5vMzEwUFhY6n993330oKCjAiy++iLy8PCQnJ2PTpk3VOl/7Kt51Ru6mNpagSNIB/gE4W2ZBx0BOFEtEVBO3zjKqUChw8eLFeq8/dOjQOv/lKkkS5s+fj/nz59e6TlZWVrVlaWlpzeZy2dXMNoFSix0AEKZp9Fy9RC5kQmD/f9fg1gcexb6CCoYiIqJaNCoU/fe//3V5LoRAbm4ulixZgltvvdUthbVGlyvvOvNXSNDIGYrIffasfQ+3TnwEZ0otKDJaEaZx67+HiIhahEZ9M17dpwdwtOhERETgt7/9LRYvXuyOulolXjojTyk6dxbhMKIQWuwvMOKOOH9vl0RE5HMaFYrsdru762j1rHaBEnPVpTOGInK/eJSiEFocvWzCbbE6tkYSEV2D34o+4rLJBgFAp5CgVfDPQu4XChPCNXKY7QJHikzeLoeIyOc0qqXo6tGfr+fVV19tzCFaHV46I0+TAPSJ0ODrc+U4UFiBvhEaSBKHfSAiqtKoUHTw4EEcPHgQFosFiYmJAICTJ09CLpfj5ptvdq7HL9z6sQmBYlPlpTMO2Ege1CNEgx0XDLhisvP2fCKiazQqFI0dOxYBAQFYtWoVQkJCADgGdJw6dSoGDx6Mp59+2q1FtnQlJjvsANQyCToFgyR5jkou4aYwNfYVGLGft+cTEbloVOeVxYsXY+HChc5ABAAhISF46aWXePdZI1Tdih+qkbF1jTzu5nAtACCz1IIrlZ89IiJqZCgqLS1FQUFBteUFBQUoKyu74aJaEyHEr6GIl86oCYRq5OgQoAQAHCw0erkaIiLf0ahQdNddd2Hq1KlYv349zp8/j/Pnz+M///kPpk2bhrvvvtvdNbZoeouAxQ7IJSBQxbvOqGn0iXC0Fv1cZITFzvnQiIiARvYpWr58OZ555hk88MADsFgsjh0pFJg2bRr+8Y9/uLXAlq6qlShELYeMl86oiXQMVCJIJUOJ2Y5fLpuQFK65/kZERC1co5omdDod3nrrLRQVFTnvRLt8+TLeeust+Pn5ubvGFq3IeemMrUTUdGSShJsrg9D+woo65yAkImotbuiXODc3F7m5uejcuTP8/Pz4xdpAFVY7KqwCEhwtRURNqVeYBgoJuFRhw4Vyq7fLISLyukaFoqKiIgwfPhxdunTBnXfeidzcXADAtGnTeDt+A1yuHJsoUCWDQsZLZ9S0tAoZuoeqAQD7Cyq8XA0Rkfc1KhT96U9/glKpRE5ODnQ6nXP5fffdh02bNrmtuJbuspF3nZF3Vd2ef6LYDL2FcxoSUevWqI7Wmzdvxtdff422bdu6LO/cuTOys7PdUlhLZ5dkKK38EWJ/IvKWaJ0Cbf0UOF9uxaFCIwbF6K6/ERFRC9WoX+Py8nKXFqIqly9fhlqtvuGiWgOzyh+AYwJYDSeAJS+6ufL2/IOFFbDx9nwiasUa9Ws8ePBgvP/++87nkiTBbrfjlVdewbBhw9xWXEtmVjtCES+dkbclBqngp5BQbhU4WWL2djlERF7TqMtnr7zyCoYPH459+/bBbDbjueeew7Fjx3D58mV8//337q6xxZErVbCoHEMXhGoYisi75DIJyeEafJ9Xgf0FFegWwtZeImqdGtVS1LNnT5w8eRKDBg3CuHHjUF5ejrvvvhsHDx5EQkKCu2tscTr2vRVCJoNKBvhzAljyAcnhGsgAnC+3It/A2/OJqHVqcEuRxWLByJEjsXz5cvz1r3/1RE0tXvchIwE4xibiBLDkCwKUciQGq5BRbMaBwgqMahfg7ZKIiJpcg1uKlEolDh8+7IlaWgUBoFtlKOKlM/IlVR2uj102ocLK2/OJqPVp1OWzSZMm4b333nN3La1CGZQIiooF7HYEcwJY8iFt/RSI1MphFcDhIqO3yyEianKN6mhttVqxYsUKfPPNN+jTp0+1+c5effVVtxTXEl2CYygDlbkcMonzxJHvkCQJfcK1+OqcHgcLjbglUstJiomoVWlQKDpz5gzat2+Po0eP4uabbwYAnDx50mUd9pGpWwEclyhUZj2ASO8WQ3SN7qFqbLtYjmKzHWdKLegUpPJ2SURETaZBoahz587Izc3F9u3bATim9XjzzTcRFRXlkeJamhKzDWWSCnabDSpTubfLIapGKZOQFKbBj5cqcKCggqGIiFqVBnVqEcJ1tNuvvvoK5eX8ca+v05UD42X//CNkwublaohq1jtcAwA4U2Zxzs9HRNQa3FBP32tDEtVNAqARVmTs5KS55LtC1HIkBCoBAAcKK7xcDRFR02lQKJIkqVqfIfYhqr+bI7QYjIvY/dG73i6FqE59Km/PP3LZBLON//ghotahQX2KhBCYMmWKc9JXo9GIP/zhD9XuPlu/fr37KmxhJAA2q8XbZRDVqUOAEiFqGa6Y7Dh2xYje4Vpvl0RE5HENCkWpqakuzydNmuTWYojIN0iShJvDtdh6oRwHCoxIDtOwVZiIWrwGhaKVK1d6qg4i8jE3harxbW45Cow2nNNb0S5A6e2SiIg8ikMqE1GNNAoZeoQ47kTbzw7XRNQKMBQRUa1ujnCEopPFZpSaeXs+EbVsDEVEVKtIrQJx/goIAIcKOR8aEbVsPh+K2rdv7xwK4OrHzJkza1w/PT292roajaaJqyZqOfpU3nl2qMgIq5235xNRy9WoCWGb0k8//QSb7ddm+6NHj+L222/HhAkTat0mMDAQJ06ccD7nXTNEjdc5WIUApQxlFjtOFJvQI5T/yCCilsnnQ1FERITL85dffhkJCQkYMmRIrdtIkoTo6GhPl0bUKsglCcnhGnyXa8D+AiNDERG1WD5/+exqZrMZq1evxsMPP1xn649er0d8fDzi4uIwbtw4HDt2rAmrJGp5ksM0kEnARYMVuQYOPkpELVOzCkUbNmxAcXExpkyZUus6iYmJWLFiBT7//HOsXr0adrsdAwcOxPnz52vdxmQyobS01OVBRL/yU8rQNdgxkv2BAna4JqKWqVmFovfeew+jRo1CbGxsresMGDAAkydPRnJyMoYMGYL169cjIiICb7/9dq3bLFy4EEFBQc5HXFycJ8onatb6VN6e/8sVEwxWu5erISJyv2YTirKzs/HNN99g+vTpDdpOqVSid+/eOH36dK3rzJ49GyUlJc7HuXPnbrRcohYnVqdAlFYOmwAOF7G1iIhanmYTilauXInIyEiMHj26QdvZbDYcOXIEMTExta6jVqsRGBjo8iAiV5IkoU+E4/b8AwVG2AVvzyeilqVZhCK73Y6VK1ciNTUVCoXrDXOTJ0/G7Nmznc/nz5+PzZs348yZMzhw4AAmTZqE7OzsBrcwEVF13ULU0MgllFrsOF1i9nY5RERu5fO35APAN998g5ycHDz88MPVXsvJyYFM9mu2u3LlCmbMmIG8vDyEhISgT58+2L17N7p3796UJRO1SEqZhKQwDX64VIEDhUZ0qex8TUTUEjSLUHTHHXdA1NJUv2PHDpfnr732Gl577bUmqIqodeod7ghFWWUWFBqtCNc0i68RIqLrahaXz4jIdwSr5egUpALA2/OJqGVhKCKiBusT7rg9/+hlE0w23p5PRC0D272JWomMjAy37UsA0CEGBrsS32XmYUSX2scOIyJqLhiKiFq4okt5gCRh0qRJbt3vgPum43d/XohNx3LQWW1FfHw7t+6fiKipMRQRtXD6khJACKQtWIykW/q7bb92SYYimxXh7TvhZFE+4uPdtmsiIq9gKCJqJdp0SEBir2S37nP/mYsw6kKQgwC37peIyBvY0ZqIGk1TUQwAKIAWV0w27xZDRHSDGIqIqNEUNjNOfL8VkCTsK6jwdjlERDeEoYiIbsiu1csAOCaJNVp5ez4RNV8MRUR0Q07/sBP+wgyLHThUxMEciaj5YigiohsWjzIAwP4CI2y1TMlDROTrGIqI6IbFoBx+CgllFjuOXzF5uxwiokZhKCKiGyYDcHOEFgDw46WKWidwJiLyZQxFROQWvcM1UEhAfoUN58qt3i6HiKjBGIqIyC10Chl6hjomiv3xEm/PJ6Lmh6GIiNzmlkhHKDpdYsZlIwdzJKLmhaGIiNwmTKNAQqASAFuLiKj5YSgiIrfqH6UDABy5bITewsEciaj5YCgiIreK81MgVqeATYBTfxBRs8JQRERuJUkSfhPluD3/YIERRhtbi4ioeWAoIiK36xykQphGDpNd4FAhp/4gouaBoYiI3E6SJPSPdLQW7btkhNXOwRyJyPcxFBGRR/QIUSNAKYPeasexy5z6g4h8H0MREXmEXCbhlsrWor2XDLBz6g8i8nEMRUTkMclhGmjkEq6Y7DhZYvZ2OUREdWIoIiKPUckl3BzuGOV6bz4niiUi38ZQREQe1TdCC4UE5BmsOFtm8XY5RES1YigiIo/SKWXoXdla9H2ega1FROSzGIqIyOP6R+mgkIAL5VZk69laRES+iaGIiDzOXylD0lWtRUREvoihiIiaxG8itZBLwDm9FTnsW0REPoihiIiaRIBKjl5hbC0iIt/FUERETeY3UVrIJCBbb8E59i0iIh/DUERETSZIJcdNoWoAwG62FhGRj2EoIqImNSBKBwnA2TILLpSztYiIfAdDERE1qWC1HD0rW4u+y2VrERH5Dp8ORXPnzoUkSS6Prl271rnNunXr0LVrV2g0Gtx000348ssvm6haIqqvW6N1kElAVpkF2WWcE42IfINPhyIA6NGjB3Jzc52PXbt21bru7t27MXHiREybNg0HDx7E+PHjMX78eBw9erQJKyai6wlWy5FceSfazosc5ZqIfIPPhyKFQoHo6GjnIzw8vNZ133jjDYwcORLPPvssunXrhgULFuDmm2/GkiVLmrBiIqqPgdGOUa4vGqw4XcrWIiLyPp8PRadOnUJsbCw6duyIBx98EDk5ObWuu2fPHowYMcJlWUpKCvbs2VPnMUwmE0pLS10eRORZ/koZ+kZqAQDfsrWIiHyAT4ei/v37Iz09HZs2bcKyZctw9uxZDB48GGVlZTWun5eXh6ioKJdlUVFRyMvLq/M4CxcuRFBQkPMRFxfntvdARLXrH6mFWi6hwGjDL1dM3i6HiFo5nw5Fo0aNwoQJE9CrVy+kpKTgyy+/RHFxMT755BO3Hmf27NkoKSlxPs6dO+fW/RNRzbQKGfpXthZ9l2uAja1FRORFCm8X0BDBwcHo0qULTp8+XePr0dHRyM/Pd1mWn5+P6OjoOverVquhVqvdVicR1V/fCC32FVSg2GzHkSITkisnjiUiamo+3VJ0Lb1ej8zMTMTExNT4+oABA7B161aXZVu2bMGAAQOaojwiagSVXMLAKB0AYFeeARY7W4uIyDt8OhQ988wz2LlzJ7KysrB7927cddddkMvlmDhxIgBg8uTJmD17tnP9J598Eps2bcLixYtx/PhxzJ07F/v27UNaWpq33gIR1UNyuAaBKhn0Fjt+yK/wdjlE1Er5dCg6f/48Jk6ciMTERNx7770ICwvD3r17ERERAQDIyclBbm6uc/2BAwdizZo1eOedd5CUlIRPP/0UGzZsQM+ePb31FoioHhQyCcNi/QAAP1wyoMxi83JFRNQa+XSfoo8//rjO13fs2FFt2YQJEzBhwgQPVUREntI1WIV9fgpcKLfi24sGjI4P8HZJRNTK+HRLERG1HpIk4bdtHK1FRy6bkGewerkiImptGIqIyGe08VOie4jjTtBtF8o5oCMRNSmGIiLyKUNiHdN/5OgtOFXC6T+IqOkwFBGRTwlSyXFL5YCO2y+Ww8Zb9ImoiTAUEZHP+U2UFn4KCVdMduwvNHq7HCJqJRiKiMjnqOUy3FZ5i/6uXAPKzLxFn4g8j6GIiHxSr1A12vgpYLYLbL1Q7u1yiKgVYCgiIp8kSRLuaOsPCcDxYjPOlrLTNRF5FkMREfmsKJ0CfSIcE8RuPq+HlZ2uiciDGIqIyKcNjtHBXynDFZMdezkvGhF5EEMREfk0tVyG4ZUjXe/JN+CKiZ2uicgzfHruMyJqHjIyMty+z/DwcLRr1w6AY160nwOUyCqzYPM5Pe5NCIQkSW4/JhG1bgxFRNRoRZfyAEnCpEmT3L5vnU6HjIwMtGvXztnp+r3jV3C2zIKjl024KUzj9mMSUevGUEREjaYvKQGEQNqCxUi6pb/b9pt9+gReSpuGwsJCZ2tRqEaOQdE67Mw14JsL5WgfoESASu62YxIRMRQR0Q1r0yEBib2SPX6c/lFanCwxI9dgxaZzevy+Iy+jEZH7sKM1ETUbMknC6Hb+kEtAZqkFRy6bvF0SEbUgDEVE1KyEaxUYHKMDAGw9X45STgFCRG7CUEREzU6/SC1idQqY7AKbcvQQgoM6EtGNYygiomZHJkkYHe+4jHamzIKfi3gZjYhuHEMRETVLYRoFbqu8jPbNeT0KK6xeroiImjuGIiJqtvpFatE+QAmrAD7PKoOFc6MR0Q1gKCKiZkuSJIyND4CfQkKB0Yat58u9XRIRNWMMRUTUrPkpZRjbPgAAcKjIiIwr7F9ERI3DUEREzV77ABUGRmkBAJty9CjmpLFE1AgMRUTUIgyK0aGtn+M2/Q1ZZbCyfxERNRBDERG1CDJJwu/aB0Ajl5BXOQ0Ixy8iooZgKCKiFiNQJce49gGQABy9bMKPlyq8XRIRNSMMRUTUonQIVGF4Gz8AwI6LBmSWmL1cERE1FwxFRNTi9InQIClMDQHgv1llKDJyYEciuj6FtwsgIqpNRkZGo7eNABCMSBTbNfjwlwL0Rx6UcPQxCg8PR7t27dxUZdPIyclBYWGhR/bdHM8HNQ1Pfe589TPHUEREPqfoUh4gSZg0adIN7ccvJBwzV28GYuLw3v5zWJl2H6wmI3Q6HTIyMnzyS7kmOTk56NatGwwGg0f239zOBzUNT37ufPUzx1BERD5HX1ICCIG0BYuRdEv/G9qXVW5Did2Gjn0GYtGODFw5sB0vzXwYhYWFPveFXJvCwkIYDAa8sOQ9xHdKdOu+s0+fwEtp05rV+aCm4anPnS9/5hiKiMhntemQgMReyTe8nxKzDb9cNsOs9kdo79sgSdKNF+cF8Z0S3XI+iBqiNX3u2NGaiFq8IJUcicEqAIBJE4Qxz/wdHMGIiK7FUERErUKoRo7OQUoAwMCJM3AGgV6uiIh8jU+HooULF+KWW25BQEAAIiMjMX78eJw4caLObdLT0yFJkstDo9E0UcVE5MsitQr4leUDADKlYOzKNXDUayJy8ulQtHPnTsycORN79+7Fli1bYLFYcMcdd6C8vLzO7QIDA5Gbm+t8ZGdnN1HFROTrtBXF2PSvlwAAu/IM2HahnMGIiAD4eEfrTZs2uTxPT09HZGQk9u/fj9tuu63W7SRJQnR0tKfLI6JmaufKN/Bk2h9xQgrFTwVGmO0CKXH+kDXTDthE5B4+3VJ0rZKSEgBAaGhonevp9XrEx8cjLi4O48aNw7Fjx+pc32QyobS01OVBRC1bPPS4s50/JAA/F5nwRVYZbGwxImrVmk0ostvteOqpp3DrrbeiZ8+eta6XmJiIFStW4PPPP8fq1atht9sxcOBAnD9/vtZtFi5ciKCgIOcjLi7OE2+BiHxMrzANxrUPgEwCMorN+DSzFEar3dtlEZGXNJtQNHPmTBw9ehQff/xxnesNGDAAkydPRnJyMoYMGYL169cjIiICb7/9dq3bzJ49GyUlJc7HuXPn3F0+EfmoriFq3NMhEAoJOFtmwfsnSzhXGlEr1SxCUVpaGv73v/9h+/btaNu2bYO2VSqV6N27N06fPl3rOmq1GoGBgS4PImo9EoJUmNQlGIFKGS6bbHj/RAkyS8zeLouImphPhyIhBNLS0vDZZ59h27Zt6NChQ4P3YbPZcOTIEcTExHigQiJqKaJ1CqQmBqOtnwImu8C6M6X4IZ+37BO1Jj4dimbOnInVq1djzZo1CAgIQF5eHvLy8lBRUeFcZ/LkyZg9e7bz+fz587F582acOXMGBw4cwKRJk5CdnY3p06d74y0QUTPip5RhYqcgJIWpAQDbLxqwLrMUegv7GRG1Bj4dipYtW4aSkhIMHToUMTExzsfatWud6+Tk5CA3N9f5/MqVK5gxYwa6deuGO++8E6Wlpdi9eze6d+/ujbdARM2MXCZhZJw/7mjrB4UEnCmz4L2MKzhRbPJ2aUTkYT49TlF9mq137Njh8vy1117Da6+95qGKiKg1kCQJN0do0c5fiS+yy5BfYcNnZ8vQM9SMEW39oJH79L8niaiR+P9sIqJahGsVmNwlGAOitJAAHL1swr9/KcaRIiP7GhG1QAxFRER1kMskDIn1wwOdgxCilkFvtWNjjh4fnCxBbrnF2+URkRsxFBER1UOcvxLTuoZgaKwOShlw0WDFqpMl2JhdhmKTzdvlEZEb+HSfIiIiX6KQSfhNlA49QtXYccGAY1dMOHLZhKOXTegRqsaAKC3CNPxaJWqu+P9eIqIGClDKMbZ9AG6O0OC7XAOyyiw4WhmOEoNV6BepRaxOAYkTzBI1KwxFRESN1MZPifs7BSG33ILd+RU4VWLGiWLHI1wjR68wDXqGqKFTsqcCUXPAUEREdINi/JS4p6MSBRVW/HCpAsevmFBotGHbhXLsuFiOToEqJAar0DFQBa2CAYnIVzEUERG5SYRWgTHxARjR1g8ZV0w4XGRCrsGKkyVmnCwxQwLQ1l+BzkFqdAhQIlwj5yU2Ih/CUERE5GYauQy9w7XoHa7FpQorMq6YcLrEjAKjDef0VpzTWyvXk9DGT4E4fyXa+CkRqZVDzYEhibyGoYiIyIMitQpEahUYEuuHYpMNp0vNOF1ixoVyC4w2gcxSCzJLfx3vKFglQ4RWgUitHOEaBYJVMpg5egpRk2AoIiJqIsFqOfpGaNE3QgubELhksOJcuRXn9RZcLLdCb7Wj2GxHsdmMUyVXbSi1xYs7T+OKRo3jV0zQKGRQyySo5RJUcsf/KiTwUhzRDWIoIiLyArkkIcZPiRg/JfpFagEABqsdBRVWXKqw4VKFFVdMNhSb7dBb7NAGBMEGoMhkB0z2avuTSYBa9mtIcvnvyudyGUMTUV0YioiIfIROIUN8gArxAa7LfzpwEGN+fz/mvLcWYXEdYLTZYbIJmGwCZruAxQ7YBVBhE6iw1T4nm1yCS0hSySWY1AGI7twdNjAwETEUERH5ODkELp09CZW5HLF+1b+27UK4hCTnf9sETJXPbQKwCcBgFTBYrwpOQbF4cu1ObBUC+49dRphGjjCNAmEaOSK1ckRoFFCwhYlaCYYiIqJmTiZJ0CokaOv4RrfaXQNTVWi6XKpHaYURuqCQyv5MdpeO3zIA4Vo5onUKRGsViNYpEKFVQMmgRC0QQxERUSugkElQyCTorvnWP5GTg2dHDsLu/QfRpnMPFJmsKDTaUGS0Ic9ghdEmKvs42XAYJgCOoBSlU6CtnwJt/ZVo66eEH0ftphaAoYiIiKCGHe0ClGgXoHQuE0Kg1GJHnsGKPIMV+QYr8iqsMFgFcg1W5Bqs+KnACAAIUcvQ1k+Jtv5KxPsrEayWe+utEDUaQxEREdVIkiQEqeQIUsmRGKwG4AhKJWY7LpRbcL5yOIECow1XTHZcMZlw5LKjNSlYJUN8gBLtA1SI91dy/jdqFhiKiIio3iRJQrBajmC1HD1CHcuMVjsulFtxvtyCnMoxl4rNdhQXmfBzkSMkRWrliPd3hKQ4fyVUcvZJIt/DUERERDdEo5AhIUiFhCAVAMBks+Oc3oqsMjOyyxwtSVX9kn4qMEIGINZPgfgAJeIDVIjV8Q438g0MRUTUKmVkZHhkvyaTCWq12q379FStnj5Gl/BwjOjWDuUWO7L1FmSXmZFVZkGJ2e649FZuxfd5FVBIcPZFig9QIlqngOw6o3Pn5OSgsLDQ7TWHh4ejXbt2bt8v0Dxrbm0YioioVSm6lAdIEiZNmuSR/UuSBCFqH0DxRuj1erfv05PnQ6fTISMjA+3atUP3EDW6hzjCYrHJhuwyi6MlSW+BwSqQVWZBVpkFyHWMzN3WX+EYyNLfMVHu1VOY5OTkoFu3bjAYDB6t2Z18tWabcB2mwS4E7MIxGGgR1OjY91ZYlFqUWeyQ4Bg5XSFJUMhw3eDaHDEUEVGroi8pAYRA2oLFSLqlv1v3vXf7Zry3aL7b9121X6PR6LZ9VvHU+cg+fQIvpU1DYWFhtR/rqj5JSeEaCCFQaLQhR29BdpkF2XoLTNdMlKuVS2gXoEScvxJt/BS4VFgIg8GAF5a8h/hOiU1S840qbOKaLXaBErMNJSY7Si2O/y0x21BmscNoEzDaBEw2OyzVZ4z5lRSFGe9sQAmAw5V9w1xeBqCQAUrZryOkqyrn5NMoJGjlMihlzWtOPoYiImqV2nRIQGKvZLfuM/vUCY/su2q/nuSJ81EfkiQhQusYELJPhBZ24RgXKbuyFemc3oIKm8CJYjNOFJsBADK0xYx3P0d410SEt4lBgFLWbDpux3dKdMt5tlYOxGlW+WHAfdNwAsHIPluKErMdpWab66jl9aCSOQKNTOYYh0ouSTAZK5B55gxi4jtCqVLDDkcrUtVMMgKAxe4IYAbUfDyZ5Ai1WoUMfgoJfkoZ7JLvDtfAUERERD5DJkmO0bN1CvSPclzeyS23IltvwQW9BRcMVphsMnTsMxAVAI5XBiWVTIKfUoKfQgZd5Y+vVi41q1aKKkI4pmUxXnVZ6+r57kw2AWfmCW6L3/35ZWQDQOW5qKKWSQhUyRCkliNIJUOQSo5ApQxahQS1XAbNVRMG13Qp7MCBs3h8wmC8u2mXS5Crqs8qBKyVoahqShmzs17Hwy6AcqtAudUGZ2+qiE74y+ZjOAMVbvbA+bsRDEVEROSz5JLkGDXb3zGopBAC3x08glnzF+HB5+ZBHhACg9UxhYnZJHDF9Ov1IBkAncLRSqG56pKOWi559bJO1Vx11wYJ01XTsNQxr6+TQgKE2Yifv/sGo4fdhk5tY5zhJ0glg0bhmbGhJEmCQgIUkIA6Gn3swhGMKqwCBqvdEY4sdhitdgSERwKi2CP13QiGIiIiajYkSYI/rNj/+Rr84bE/IrFDDKz2yh9di0C51Y5yqx0Gq6OVQm8V0Ftt1fcDQCX/tS+MQgaU+4Vh4MRHcBE6BJSYoJRJlZ2KK0MUpMoaHPsQwtFKYrELWEXl/1a2nDjCgB0Gqx0VVoFLiMSf/vM9isI7ojC/fn3DlDJUtuTIoJZJLi07armjrhOHT+DDZ6Zi1v79uDmyo7tOs1vIJAk6hWNqmbCr0tPxwz9jwbNP4KP097xYXc0YioiIqFlTyCQEquQIVP26TFS2UhisjnBitAlU2ASMVjvMdkd/mKpWGVTNf+sXjrHP/h1HARw9U+beIiUNIjt0cfa8kaEylF0VzFRy1+Ajb4aX/upDgsC5I/uhRfWw6m0MRURE1OJIkgStQoJWAVx7jcde2apjsgmY7YDZ5nheWFCAH3ftxNDbU6D2C4DVflXrj/i1d7FwHgNQVt6erpRVtSg5nqtljst2uso+TnnnsvD4ozPw7MLX0KVbNyik5nVXVmvBUERERK2KTKpqjXFdbjx7CR89PwPP3L4fNyfGufWYB85V4My+76GwmaHk6N0+izP0EREREYGhiIiIiAgAQxERERERAIYiIiIiIgAMRUREREQAmkkoWrp0Kdq3bw+NRoP+/fvjxx9/rHP9devWoWvXrtBoNLjpppvw5ZdfNlGlRERE1Fz5fChau3YtZs2ahTlz5uDAgQNISkpCSkoKLl26VOP6u3fvxsSJEzFt2jQcPHgQ48ePx/jx43H06NEmrpyIiIiaE58PRa+++ipmzJiBqVOnonv37li+fDl0Oh1WrFhR4/pvvPEGRo4ciWeffRbdunXDggULcPPNN2PJkiVNXDkRERE1Jz4disxmM/bv348RI0Y4l8lkMowYMQJ79uypcZs9e/a4rA8AKSkpta5PREREBPj4iNaFhYWw2WyIiopyWR4VFYXjx4/XuE1eXl6N6+fl5dV6HJPJBJPJ5HxeUlICACgtLW1s6bXS6/UAgJNHDqGivNxt+83OPAkAOJtxDH5ardv268l9s+am2Tdrbv77bo41nztzCgCwf/9+5/eeu5w4cQKA+79HWbMrT9es1+vd/jtbtT9RNS1LQwkfduHCBQFA7N6922X5s88+K/r161fjNkqlUqxZs8Zl2dKlS0VkZGStx5kzZ46AYzobPvjggw8++OCjmT/OnTvXqNzh0y1F4eHhkMvlyM/Pd1men5+P6OjoGreJjo5u0PoAMHv2bMyaNcv53G634/LlywgLC6s2YV9paSni4uJw7tw5BAYGNvQttUg8J9XxnFTHc1IznpfqeE6q4zmprqZzIoRAWVkZYmNjG7VPnw5FKpUKffr0wdatWzF+/HgAjsCydetWpKWl1bjNgAEDsHXrVjz11FPOZVu2bMGAAQNqPY5arYZarXZZFhwcXGdtgYGB/GBeg+ekOp6T6nhOasbzUh3PSXU8J9Vde06CgoIavS+fDkUAMGvWLKSmpqJv377o168fXn/9dZSXl2Pq1KkAgMmTJ6NNmzZYuHAhAODJJ5/EkCFDsHjxYowePRoff/wx9u3bh3feecebb4OIiIh8nM+Hovvuuw8FBQV48cUXkZeXh+TkZGzatMnZmTonJwcy2a830Q0cOBBr1qzBCy+8gL/85S/o3LkzNmzYgJ49e3rrLRAREVEz4POhCADS0tJqvVy2Y8eOassmTJiACRMmeKQWtVqNOXPmVLvc1prxnFTHc1Idz0nNeF6q4zmpjuekOk+cE0mIxt63RkRERNRy+PTgjURERERNhaGIiIiICAxFRERERAAYioiIiIgAMBTVy9///ncMHDgQOp3uuoM6VhFC4MUXX0RMTAy0Wi1GjBiBU6dOebbQJnT58mU8+OCDCAwMRHBwMKZNm3bdeXeGDh0KSZJcHn/4wx+aqGL3W7p0Kdq3bw+NRoP+/fvjxx9/rHP9devWoWvXrtBoNLjpppvw5ZdfNlGlTach5yQ9Pb3a50Gj0TRhtZ737bffYuzYsYiNjYUkSdiwYcN1t9mxYwduvvlmqNVqdOrUCenp6R6vsyk19Jzs2LGj2udEkqQ657NsbhYuXIhbbrkFAQEBiIyMxPjx453zjtWlJX+nNOacuOM7haGoHsxmMyZMmIDHHnus3tu88sorePPNN7F8+XL88MMP8PPzQ0pKCoxGowcrbToPPvggjh07hi1btuB///sfvv32WzzyyCPX3W7GjBnIzc11Pl555ZUmqNb91q5di1mzZmHOnDk4cOAAkpKSkJKSgkuXLtW4/u7duzFx4kRMmzYNBw8exPjx4zF+/HgcPXq0iSv3nIaeE8AxEu3Vn4fs7OwmrNjzysvLkZSUhKVLl9Zr/bNnz2L06NEYNmwYDh06hKeeegrTp0/H119/7eFKm05Dz0mVEydOuHxWIiMjPVRh09u5cydmzpyJvXv3YsuWLbBYLLjjjjtQXsckrC39O6Ux5wRww3dKo2ZMa6VWrlwpgoKCrrue3W4X0dHR4h//+IdzWXFxsVCr1eKjjz7yYIVN45dffhEAxE8//eRc9tVXXwlJksSFCxdq3W7IkCHiySefbIIKPa9fv35i5syZzuc2m03ExsaKhQsX1rj+vffeK0aPHu2yrH///uLRRx/1aJ1NqaHnpL7/f2opAIjPPvusznWee+450aNHD5dl9913n0hJSfFgZd5Tn3Oyfft2AUBcuXKlSWryBZcuXRIAxM6dO2tdpzV8p1ytPufEHd8pbCnygLNnzyIvLw8jRoxwLgsKCkL//v2xZ88eL1bmHnv27EFwcDD69u3rXDZixAjIZDL88MMPdW774YcfIjw8HD179sTs2bNhMBg8Xa7bmc1m7N+/3+XvK5PJMGLEiFr/vnv27HFZHwBSUlJaxOcBaNw5AQC9Xo/4+HjExcVh3LhxOHbsWFOU67Na+ufkRiQnJyMmJga33347vv/+e2+X41ElJSUAgNDQ0FrXaW2flfqcE+DGv1MYijyg6lp31VQkVaKiolrEdfC8vLxqTdcKhQKhoaF1vr8HHngAq1evxvbt2zF79mx88MEHmDRpkqfLdbvCwkLYbLYG/X3z8vJa7OcBaNw5SUxMxIoVK/D5559j9erVsNvtGDhwIM6fP98UJfuk2j4npaWlqKio8FJV3hUTE4Ply5fjP//5D/7zn/8gLi4OQ4cOxYEDB7xdmkfY7XY89dRTuPXWW+ucnqqlf6dcrb7nxB3fKc1img9PeP7557Fo0aI618nIyEDXrl2bqCLvq+85aayr+xzddNNNiImJwfDhw5GZmYmEhIRG75eapwEDBmDAgAHO5wMHDkS3bt3w9ttvY8GCBV6sjHxJYmIiEhMTnc8HDhyIzMxMvPbaa/jggw+8WJlnzJw5E0ePHsWuXbu8XYrPqO85ccd3SqsNRU8//TSmTJlS5zodO3Zs1L6jo6MBAPn5+YiJiXEuz8/PR3JycqP22RTqe06io6OrdZ61Wq24fPmy873XR//+/QEAp0+fblahKDw8HHK5HPn5+S7L8/Pza33/0dHRDVq/uWnMObmWUqlE7969cfr0aU+U2CzU9jkJDAyEVqv1UlW+p1+/fi0yNKSlpTlvXGnbtm2d67b075QqDTkn12rMd0qrvXwWERGBrl271vlQqVSN2neHDh0QHR2NrVu3OpeVlpbihx9+cEmxvqa+52TAgAEoLi7G/v37ndtu27YNdrvdGXTq49ChQwDgEhybA5VKhT59+rj8fe12O7Zu3Vrr33fAgAEu6wPAli1bfPrz0BCNOSfXstlsOHLkSLP7PLhTS/+cuMuhQ4da1OdECIG0tDR89tln2LZtGzp06HDdbVr6Z6Ux5+RajfpOuaFu2q1Edna2OHjwoJg3b57w9/cXBw8eFAcPHhRlZWXOdRITE8X69eudz19++WURHBwsPv/8c3H48GExbtw40aFDB1FRUeGNt+B2I0eOFL179xY//PCD2LVrl+jcubOYOHGi8/Xz58+LxMRE8cMPPwghhDh9+rSYP3++2Ldvnzh79qz4/PPPRceOHcVtt93mrbdwQz7++GOhVqtFenq6+OWXX8QjjzwigoODRV5enhBCiIceekg8//zzzvW///57oVAoxD//+U+RkZEh5syZI5RKpThy5Ii33oLbNfSczJs3T3z99dciMzNT7N+/X9x///1Co9GIY8eOeestuF1ZWZnz+wKAePXVV8XBgwdFdna2EEKI559/Xjz00EPO9c+cOSN0Op149tlnRUZGhli6dKmQy+Vi06ZN3noLbtfQc/Laa6+JDRs2iFOnTokjR46IJ598UshkMvHNN9946y243WOPPSaCgoLEjh07RG5urvNhMBic67S275TGnBN3fKcwFNVDamqqAFDtsX37duc6AMTKlSudz+12u/jb3/4moqKihFqtFsOHDxcnTpxo+uI9pKioSEycOFH4+/uLwMBAMXXqVJeQePbsWZdzlJOTI2677TYRGhoq1Gq16NSpk3j22WdFSUmJl97BjfvXv/4l2rVrJ1QqlejXr5/Yu3ev87UhQ4aI1NRUl/U/+eQT0aVLF6FSqUSPHj3Exo0bm7hiz2vIOXnqqaec60ZFRYk777xTHDhwwAtVe07V7eTXPqrOQ2pqqhgyZEi1bZKTk4VKpRIdO3Z0+V5pCRp6ThYtWiQSEhKERqMRoaGhYujQoWLbtm3eKd5Dajof1/6mtLbvlMacE3d8p0iVByciIiJq1VptnyIiIiKiqzEUEREREYGhiIiIiAgAQxERERERAIYiIiIiIgAMRUREREQAGIqIiIiIADAUERH5tKysLEiS5JwWh4g8h6GIiKjS3LlzfXrSZiLyLIYiIvIKi8Xi7RJ8ihACVqvV22UQtWoMRUStRNVlmGsfQ4cOBQBkZ2dj7NixCAkJgZ+fH3r06IEvv/zSuf2xY8cwZswYBAYGIiAgAIMHD0ZmZiYAwG63Y/78+Wjbti3UajWSk5OxadOmasdeu3YthgwZAo1Ggw8//BAA8O9//xvdunWDRqNB165d8dZbb9X5Pux2O1555RV06tQJarUa7dq1w9///nfn60eOHMFvf/tbaLVahIWF4ZFHHoFer3e+vmPHDvTr1w9+fn4IDg7GrbfeiuzsbKSnp2PevHn4+eefnecmPT29xhqmTJmC8ePHY968eYiIiEBgYCD+8Ic/wGw2u9S5cOFCdOjQAVqtFklJSfj0009d6pAkCV999RX69OkDtVqNXbt21fq+jx8/joEDB0Kj0aBnz57YuXOn87X09HQEBwe7rL9hwwZIkuR8/vPPP2PYsGEICAhAYGAg+vTpg3379tV5rolaG4W3CyCiphEXF4fc3Fzn87y8PIwYMQK33XYbAGDmzJkwm8349ttv4efnh19++QX+/v4AgAsXLuC2227D0KFDsW3bNgQGBuL77793tmy88cYbWLx4Md5++2307t0bK1aswO9+9zscO3YMnTt3dh7z+eefx+LFi9G7d29nMHrxxRexZMkS9O7dGwcPHsSMGTPg5+eH1NTUGt/H7Nmz8e677+K1117DoEGDkJubi+PHjwMAysvLkZKSggEDBuCnn37CpUuXMH36dKSlpSE9PR1WqxXjx4/HjBkz8NFHH8FsNuPHH3+EJEm47777cPToUWzatAnffPMNACAoKKjW87l161ZoNBrs2LEDWVlZmDp1KsLCwpwBbeHChVi9ejWWL1+Ozp0749tvv8WkSZMQERGBIUOGuJyTf/7zn+jYsSNCQkJqPd6zzz6L119/Hd27d8err76KsWPH4uzZswgLC6v9j36VBx98EL1798ayZcsgl8tx6NAhKJXKem1L1Gq4YzZbImpeKioqRP/+/cWYMWOEzWYTQghx0003iblz59a4/uzZs0WHDh2E2Wyu8fXY2Fjx97//3WXZLbfcIv74xz8KIYQ4e/asACBef/11l3USEhLEmjVrXJYtWLBADBgwoMbjlJaWCrVaLd59990aX3/nnXdESEiI0Ov1zmUbN24UMplM5OXliaKiIgFA7Nixo8bt58yZI5KSkmp87WqpqakiNDRUlJeXO5ctW7ZM+Pv7C5vNJoxGo9DpdGL37t0u202bNk1MnDhRCPHrbPEbNmyo81hV5+7ll192LrNYLKJt27Zi0aJFQgghVq5cKYKCgly2++yzz8TVX/EBAQEiPT39uu+NqDVjSxFRK/Twww+jrKwMW7ZsgUzmuIr+xBNP4LHHHsPmzZsxYsQI3HPPPejVqxcA4NChQxg8eHCNLQulpaW4ePEibr31Vpflt956K37++WeXZX379nX+d3l5OTIzMzFt2jTMmDHDudxqtdbaQpORkQGTyYThw4fX+npSUhL8/Pxc6rDb7Thx4gRuu+02TJkyBSkpKbj99tsxYsQI3HvvvYiJianrdNUoKSkJOp3O+XzAgAHQ6/U4d+4c9Ho9DAYDbr/9dpdtzGYzevfu7bLs6nNSlwEDBjj/W6FQoG/fvsjIyKh3vbNmzcL06dPxwQcfYMSIEZgwYQISEhLqvT1Ra8A+RUStzEsvvYSvv/4a//3vfxEQEOBcPn36dJw5cwYPPfQQjhw5gr59++Jf//oXAECr1brl2FeHlap+Pu+++y4OHTrkfBw9ehR79+6tcXt31LFy5Urs2bMHAwcOxNq1a9GlS5daj9dYVe9t48aNLu/tl19+celXBLiek8aSyWQQQrgsu7Yj+9y5c3Hs2DGMHj0a27ZtQ/fu3fHZZ5/d8LGJWhKGIqJW5D//+Q/mz5+PTz75pMZWgri4OPzhD3/A+vXr8fTTT+Pdd98FAPTq1QvfffddjXeMBQYGIjY2Ft9//73L8u+//x7du3evtZaoqCjExsbizJkz6NSpk8ujQ4cONW7TuXNnaLVabN26tcbXu3Xrhp9//hnl5eUudchkMiQmJjqX9e7dG7Nnz8bu3bvRs2dPrFmzBgCgUqlgs9lqrflqP//8MyoqKpzP9+7dC39/f8TFxaF79+5Qq9XIycmp9t7i4uLqtf9rXR3crFYr9u/fj27dugEAIiIiUFZW5vK+axrXqEuXLvjTn/6EzZs34+6778bKlSsbVQtRi+Xt63dE1DSOHDkidDqdeOGFF0Rubq7zUVRUJIQQ4sknnxSbNm0SZ86cEfv37xf9+/cX9957rxBCiMLCQhEWFibuvvtu8dNPP4mTJ0+K999/Xxw/flwIIcRrr70mAgMDxccffyyOHz8u/vznPwulUilOnjwphPi1X8zBgwddanr33XeFVqsVb7zxhjhx4oQ4fPiwWLFihVi8eHGt72Pu3LkiJCRErFq1Spw+fVrs2bNH/Pvf/xZCCFFeXi5iYmLEPffcI44cOSK2bdsmOnbsKFJTU4UQQpw5c0Y8//zzYvfu3SIrK0t8/fXXIiwsTLz11ltCCCE+/PBD4efnJw4ePCgKCgqE0WissYbU1FTh7+8vJk6cKI4dOyY2btwooqKixPPPP+9c569//asICwsT6enp4vTp02L//v3izTffdPbrqepTdOXKlTr/blXnrl27dmL9+vUiIyNDPPLII8Lf318UFBQIIYQoKioSfn5+4oknnhCnT58WH374oYiNjXX2KTIYDGLmzJli+/btIisrS+zatUskJCSI5557rs5jE7U2DEVErcTKlSsFgGqPIUOGCCGESEtLEwkJCUKtVouIiAjx0EMPicLCQuf2P//8s7jjjjuETqcTAQEBYvDgwSIzM1MIIYTNZhNz584Vbdq0EUqlUiQlJYmvvvrKuW1toUgIRxBJTk4WKpVKhISEiNtuu02sX7++1vdhs9nESy+9JOLj44VSqRTt2rUT//d//+d8/fDhw2LYsGFCo9GI0NBQMWPGDFFWViaEECIvL0+MHz9exMTECJVKJeLj48WLL77o7GxuNBrFPffcI4KDgwUAsXLlyhprSE1NFePGjRMvvviiCAsLE/7+/mLGjBkuIcput4vXX39dJCYmCqVSKSIiIkRKSorYuXOnEKLhoWjNmjWiX79+QqVSie7du4tt27a5rPfZZ5+JTp06Ca1WK8aMGSPeeecdZygymUzi/vvvF3FxcUKlUonY2FiRlpYmKioq6jw2UWsjCXHNhWgiIqrTlClTUFxcjA0bNni7FCJyI/YpIiIiIgJDEREREREAgJfPiIiIiMCWIiIiIiIADEVEREREABiKiIiIiAAwFBEREREBYCgiIiIiAsBQRERERASAoYiIiIgIAEMREREREQCGIiIiIiIAwP8DVqr/4VVPbjIAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -778,26 +1780,18 @@ } ], "source": [ - "#distribution curve of zscore\n", - "sns.histplot(filtered['zscore_cost_per_bus'], kde=True, color='skyblue', bins=20)\n", - "#plt.axvline(mean2, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {mean2:.2f}')\n", - "#plt.axvline(mean2 + std_dev2, color='green', linestyle='dashed', linewidth=2, label=f'Standard Deviation: {std_dev2:,.2f}')\n", - "#plt.axvline(mean2 - std_dev2, color='green', linestyle='dashed', linewidth=2)\n", - "#plt.axvline(mean2 + (std_dev2*2), color='green', linestyle='dashed', linewidth=2)\n", - "#plt.axvline(mean2 + (std_dev2*3), color='green', linestyle='dashed', linewidth=2)\n", - "#plt.axvline(mean2 - (std_dev2*2), color='green', linestyle='dashed', linewidth=2)\n", - "#plt.axvline(mean2 - (std_dev2*3), color='green', linestyle='dashed', linewidth=2)\n", - "\n", - "plt.title('Cost Per Bus Z-Score Distribution')\n", - "plt.xlabel('zscore cost per bus')\n", - "plt.ylabel('Frequency')\n", + "# distribution curve of zscore\n", + "sns.histplot(zscore_bus[\"zscore_cost_per_bus\"], kde=True, color=\"skyblue\", bins=20)\n", + "plt.title(\"Cost Per Bus Z-Score Distribution\")\n", + "plt.xlabel(\"zscore cost per bus\")\n", + "plt.ylabel(\"Frequency\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "7776e9cc-e750-4b8e-8352-a43bd448e7b4", + "id": "7dc778c3-7441-44a2-a8d4-fc01894a1b61", "metadata": {}, "outputs": [], "source": [] diff --git a/bus_procurement_cost/tircp_bus_analysis.ipynb b/bus_procurement_cost/tircp_bus_analysis.ipynb index 4a116d6d2..aec30ac1a 100644 --- a/bus_procurement_cost/tircp_bus_analysis.ipynb +++ b/bus_procurement_cost/tircp_bus_analysis.ipynb @@ -1,13 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "id": "e767add0-6871-46de-b480-d77ae7d62819", - "metadata": {}, - "source": [ - "## Intro" - ] - }, { "cell_type": "code", "execution_count": 1, @@ -23,6 +15,17 @@ "pd.set_option(\"display.max_rows\", 200)" ] }, + { + "cell_type": "markdown", + "id": "9f3cd0cb-98d8-43a4-be8d-b7b41f80dd75", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## AGREEMENT ALLOCATIONS SHEET DATA" + ] + }, { "cell_type": "markdown", "id": "52794293-2f66-47a7-b580-6170fc72ca94", @@ -31,7 +34,7 @@ "tags": [] }, "source": [ - "## Agreement Allocations - Read in Raw data" + "### Agreement Allocations - Read in Raw data" ] }, { @@ -59,10 +62,11 @@ "cell_type": "markdown", "id": "79002d63-c2a1-4218-81a2-31e6a10d981e", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Agreement Allocations -Data Cleaning and QC" + "### Agreement Allocations -Data Cleaning and QC" ] }, { @@ -222,7 +226,7 @@ "metadata": {}, "outputs": [], "source": [ - "#see that some rows were consolidated\n", + "# see that some rows were consolidated\n", "display(tircp.grant_recipient.nunique())" ] }, @@ -234,7 +238,7 @@ "tags": [] }, "source": [ - "## Agreement Allocations-Export Cleaned data" + "### Agreement Allocations-Export Cleaned data" ] }, { @@ -257,7 +261,7 @@ "tags": [] }, "source": [ - "## Agreement Allocations-Read in Cleaned data from GCS" + "### Agreement Allocations-Read in Cleaned data from GCS" ] }, { @@ -290,7 +294,7 @@ "tags": [] }, "source": [ - "## Agreement Allocations-Cost per Bus, per agency" + "### Agreement Allocations-Cost per Bus, per agency" ] }, { @@ -301,9 +305,9 @@ "outputs": [], "source": [ "# filer to project with bus count values\n", - "# caveat: some rows in \"component\" column state some variation of \"purchased buses\", but did not specify the amount of buses. \n", + "# caveat: some rows in \"component\" column state some variation of \"purchased buses\", but did not specify the amount of buses.\n", "# only rows stating the specificy number of buses purchased are included\n", - "only_bus = tircp[tircp['#_of_buses']>0]\n" + "only_bus = tircp[tircp[\"#_of_buses\"] > 0]" ] }, { @@ -323,11 +327,12 @@ "metadata": {}, "outputs": [], "source": [ - "#aggregate # of buses and allocation by transit agency\n", - "bus_cost = only_bus.groupby('grant_recipient').agg({\n", - " '#_of_buses':\"sum\",\n", - " 'allocation_amount':'sum'\n", - "}).reset_index()" + "# aggregate # of buses and allocation by transit agency\n", + "bus_cost = (\n", + " only_bus.groupby(\"grant_recipient\")\n", + " .agg({\"#_of_buses\": \"sum\", \"allocation_amount\": \"sum\"})\n", + " .reset_index()\n", + ")" ] }, { @@ -347,7 +352,9 @@ "metadata": {}, "outputs": [], "source": [ - "bus_cost['cost_per_bus']= ((bus_cost['allocation_amount'])/(bus_cost['#_of_buses'])).astype('int64')" + "bus_cost[\"cost_per_bus\"] = (\n", + " (bus_cost[\"allocation_amount\"]) / (bus_cost[\"#_of_buses\"])\n", + ").astype(\"int64\")" ] }, { @@ -357,7 +364,7 @@ "metadata": {}, "outputs": [], "source": [ - "display(bus_cost.dtypes,bus_cost)" + "display(bus_cost.dtypes, bus_cost)" ] }, { @@ -367,8 +374,10 @@ "metadata": {}, "outputs": [], "source": [ - "#exporting cost per bus\n", - "bus_cost.to_csv(\"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocation_cost_per_bus.csv\")" + "# exporting cost per bus\n", + "bus_cost.to_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocation_cost_per_bus.csv\"\n", + ")" ] }, { @@ -379,7 +388,7 @@ "tags": [] }, "source": [ - "## Agreement Allocations - Stat analysis" + "### Agreement Allocations - Stat analysis" ] }, { @@ -412,6 +421,16 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "id": "1de41d29-4183-41a3-b6af-58e1d676f788", + "metadata": { + "tags": [] + }, + "source": [ + "## PROJECT TRACKING SHEET DATA" + ] + }, { "cell_type": "markdown", "id": "b29ced72-1fd8-40aa-95c0-178a4db4a36b", @@ -420,7 +439,7 @@ "tags": [] }, "source": [ - "## project tracking - read raw data\n" + "### project tracking - read raw data\n" ] }, { @@ -476,7 +495,7 @@ "tags": [] }, "source": [ - "### initial data cleaning" + "### data frame cleaning" ] }, { @@ -486,8 +505,8 @@ "metadata": {}, "outputs": [], "source": [ - "#drop columns\n", - "#tircp = tircp.iloc[:, :12]\n", + "# only keep first couple of columns\n", + "# tircp = tircp.iloc[:, :12]\n", "project = project.iloc[:, :20]" ] }, @@ -508,14 +527,16 @@ "metadata": {}, "outputs": [], "source": [ - "drop_col=[\n", - " 'Master Agreement Expiration Date',\n", - " 'Project Manager',\n", - " 'Regional Coordinator',\n", - " 'Technical Assistance-CALITP (Y/N)',\n", - " 'Technical Assistance-Fleet (Y/N)',\n", - " 'Technical Assistance-Network Integration (Y/N)',\n", - " 'Technical Assistance-Priority Population (Y/N)',]" + "# drop specific columns\n", + "drop_col = [\n", + " \"Master Agreement Expiration Date\",\n", + " \"Project Manager\",\n", + " \"Regional Coordinator\",\n", + " \"Technical Assistance-CALITP (Y/N)\",\n", + " \"Technical Assistance-Fleet (Y/N)\",\n", + " \"Technical Assistance-Network Integration (Y/N)\",\n", + " \"Technical Assistance-Priority Population (Y/N)\",\n", + "]" ] }, { @@ -535,7 +556,7 @@ "metadata": {}, "outputs": [], "source": [ - "list(project.columns)" + "len(project.columns)" ] }, { @@ -545,557 +566,2111 @@ "metadata": {}, "outputs": [], "source": [ - "#replace space with _\n", - "project.columns =project.columns.str.replace(' ','_')" + "# replace space with _ & lower everything\n", + "project.columns = project.columns.str.replace(\" \", \"_\")\n", + "project.columns = project.columns.str.lower()" ] }, { "cell_type": "code", "execution_count": null, - "id": "a0c1d453-9ef3-4ad6-9667-9131c3bc6d72", + "id": "4937d3c4-f5df-4a42-84ea-161ec61e637b", "metadata": {}, "outputs": [], "source": [ - "#lower case everything\n", - "project.columns=project.columns.str.lower()" + "# check work\n", + "project.columns" + ] + }, + { + "cell_type": "markdown", + "id": "4372f730-9137-4e33-a45e-6563abdf4085", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### check columns\n", + "check values of all columns to see if:\n", + "-any duplicates values\n", + "-invalid int/str values\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "4937d3c4-f5df-4a42-84ea-161ec61e637b", + "id": "83152dd0-e2a4-4892-9019-ceb28ce00f5f", "metadata": {}, "outputs": [], "source": [ - "#check work\n", "project.columns" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5f5a0d4-8616-44cf-82ab-08dcc0bc58c7", + "metadata": {}, + "outputs": [], + "source": [ + "# function to check column information\n", + "\n", + "\n", + "def col_checker(col):\n", + " display(\n", + " f\"Displaying column: {col}\",\n", + " len(project[col]),\n", + " list(project[col].sort_values(ascending=True).unique()),\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24b1a759-280a-4a2b-9900-327cdf073c59", + "metadata": {}, + "outputs": [], + "source": [ + "# col is OK, all numbers\n", + "col_checker(\"tircp_award_amount_($)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f912f6d4-4b1e-4868-9fe9-4e4c85124e9a", + "metadata": {}, + "outputs": [], + "source": [ + "# col is good, everything is a number\n", + "col_checker(\"total_project_cost\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a584257f-455a-4939-9897-085e1c7f95f1", + "metadata": {}, + "outputs": [], + "source": [ + "# col is OK\n", + "col_checker(\"master_agreement_number\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d39123d-1e8b-458e-8b18-8113666de00f", + "metadata": {}, + "outputs": [], + "source": [ + "# col is OK\n", + "col_checker(\"bus_count\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "022bba0f-73c7-4ae2-9986-ae234dd5517b", + "metadata": {}, + "outputs": [], + "source": [ + "# column is OK\n", + "col_checker(\"project_description\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d77f19e7-87cb-4ae0-acd0-829addb935b6", + "metadata": {}, + "outputs": [], + "source": [ + "project[project[\"district\"] == \"VAR\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1526686-6a92-4567-92df-0cf27c25ff01", + "metadata": {}, + "outputs": [], + "source": [ + "# Project title OK,\n", + "col_checker(\"project_title\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc1361f-6d1e-4067-8cf3-738f4fe9ad85", + "metadata": {}, + "outputs": [], + "source": [ + "# award year OK\n", + "col_checker(\"award_year\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e50bb102-bdab-45f2-af8e-ccca23f25247", + "metadata": {}, + "outputs": [], + "source": [ + "# project num OK\n", + "col_checker(\"project_#\")" + ] + }, { "cell_type": "markdown", - "id": "51f95400-1198-46a6-a41a-fef99b3a2ffa", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, + "id": "92513e2d-8c76-4150-b4d0-f2034f3ce85c", + "metadata": {}, "source": [ - "### filter df for project descriptions that contain bus" + "---" ] }, { "cell_type": "code", "execution_count": null, - "id": "fc9ee142-13b2-4fc2-86af-d712ab5df6c4", + "id": "94f27bed-11ce-480d-bcf5-5c2ed07b03a8", "metadata": {}, "outputs": [], "source": [ - "bus_only = project[project['bus_count']>0]" + "# DROP COL\n", + "# Col is OK\n", + "col_checker(\"allocated_amount\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "c65b773e-dd41-4c95-8e6b-2132e5d7e978", + "id": "55860953-f885-4e1d-ae02-f4d8d6be46bf", + "metadata": {}, + "outputs": [], + "source": [ + "# NEEDS CLEANING grant_recipient need to clean\n", + "col_checker(\"grant_recipient\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74990d4-bda0-4615-9494-832f7b44f3f3", + "metadata": {}, + "outputs": [], + "source": [ + "# may need to clean, there are rows that say '3, 4'\n", + "col_checker(\"county\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00cc4687-19d7-4af3-81f9-37e66cd6cb33", + "metadata": {}, + "outputs": [], + "source": [ + "# Move to cleaning, check what is 'VAR'. various?\n", + "# may be ok just check to make sure\n", + "project.district.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e71ef09e-c7e5-4cdb-8e48-1be0ad1fa087", "metadata": {}, "outputs": [], "source": [ - "#this looks correct\n", - "display(project.shape,\n", - " bus_only.shape)" + "# couldnt run col_checker, guessing because some PPNO numbers are inconsistent\n", + "# may need to clean, there is a ppno of CP052/CP053\n", + "project.ppno.unique()" ] }, { "cell_type": "markdown", - "id": "1b95f567-13fc-4184-8b09-f512f702f3f0", + "id": "3f4e1191-979e-4a31-bce3-296fabc586dc", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "### export project- bus only df" + "### dropping allocated amount column" ] }, { "cell_type": "code", "execution_count": null, - "id": "cecea030-b37a-4170-9997-656e8bd0c080", + "id": "209b153b-f42f-4a57-97c1-087e403fec55", + "metadata": {}, + "outputs": [], + "source": [ + "# dropping allocated amount column\n", + "project.drop(columns=[\"allocated_amount\"], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16a7479b-707a-4621-906c-022598a64179", "metadata": {}, "outputs": [], "source": [ - "bus_only.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv')" + "# checking work\n", + "project.columns" ] }, { "cell_type": "markdown", - "id": "1ea926da-8d76-481c-a5fb-ef39606f45ca", + "id": "7e8652ea-a95f-4009-b592-1414da775c61", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "### Read in project bus only data\n" + "### Clean `grant_recipient` column" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "abaf10b2-0dc3-432d-845a-8dacb2af806f", + "execution_count": null, + "id": "77b1e0be-20ee-454e-8e8b-a6c0e0951d44", "metadata": {}, "outputs": [], "source": [ - "bus_only= pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv')" + "list(project.grant_recipient.sort_values(ascending=True).unique())" ] }, { "cell_type": "code", "execution_count": null, - "id": "ab4b7cf3-183e-4ea1-baca-e3f5d0bd9dd6", + "id": "e98893df-a0f5-4fbd-af6a-8fb58602a556", + "metadata": {}, + "outputs": [], + "source": [ + "new_dict = {\n", + " \"Antelope Valley Transit Authority \": \"Antelope Valley Transit Authority (AVTA)\",\n", + " \"Humboldt Transit Authority\": \"Humboldt Transit Authority (HTA)\",\n", + " \"Orange County Transportation Authority\": \"Orange County Transportation Authority (OCTA)\",\n", + " \"Capitol Corridor Joint Powers Authority\": \"Capitol Corridor Joint Powers Authority (CCJPA)\",\n", + " \"Los Angeles County Metropolitan Transportation Authority\": \"Los Angeles County Metropolitan Transportation Authority (LA Metro)\",\n", + " \"Monterey-Salinas Transit\": \"Monterey-Salinas Transit District (MST)\",\n", + " \"Sacramento Regional Transit (SacRT)\": \"Sacramento Regional Transit District (SacRT)\",\n", + " \"Sacramento Regional Transit District\": \"Sacramento Regional Transit District (SacRT)\",\n", + " \"Sacramento Regional Transit District (SacRT) \": \"Sacramento Regional Transit District (SacRT)\",\n", + " \"San Diego Association of Governments\": \"San Diego Association of Governments (SANDAG)\",\n", + " \"Santa Clara Valley Transportation Authority (SCVTA)\": \"Santa Clara Valley Transportation Authority (VTA)\",\n", + " \"Southern California Regional Rail Authority (SCRRA)\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n", + " \"Southern California Regional Rail Authority\": \"Southern California Regional Rail Authority (SCRRA - Metrolink)\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41eedd51-e052-4462-bf8f-e14bea1df7cc", "metadata": {}, "outputs": [], "source": [ - "display(bus_only.shape,\n", - " bus_only.columns,\n", - " bus_only.head())" + "# df.replace({'bus_desc': new_dict}, inplace=True)\n", + "project.replace({\"grant_recipient\": new_dict}, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, - "id": "aab862f1-7aa2-47ea-a5bc-52483b097c1e", + "id": "195fb532-8154-4ca1-ae6a-81b16d6f031e", "metadata": {}, "outputs": [], "source": [ - "#inspect columns values.\n", - "list(bus_only['grant_recipient'].sort_values().unique())\n", - "#everything looks good" + "# check work. looks good\n", + "list(project[\"grant_recipient\"].sort_values().unique())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "4acd9422-7cc0-4082-a87a-628187d1736f", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", - "id": "02cce57d-f82c-4f85-be53-814538b6b6c3", + "id": "15660296-1348-4463-a962-ac22234c2f7e", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "### Consolidate up grant recipient name" + "### Cleaning `county` column" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "e98893df-a0f5-4fbd-af6a-8fb58602a556", + "execution_count": null, + "id": "13635af7-3952-42f4-a272-c7a462ea1358", "metadata": {}, "outputs": [], "source": [ - "new_dict ={\n", - " 'Antelope Valley Transit Authority ':'Antelope Valley Transit Authority (AVTA)',\n", - " 'Humboldt Transit Authority':'Humboldt Transit Authority (HTA)',\n", - " 'Orange County Transportation Authority':'Orange County Transportation Authority (OCTA)',\n", - "}" + "col_checker(\"county\")" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "41eedd51-e052-4462-bf8f-e14bea1df7cc", + "execution_count": null, + "id": "4e3f6614-dc19-4011-a878-fcc0e8570b41", "metadata": {}, "outputs": [], "source": [ - "#df.replace({'bus_desc': new_dict}, inplace=True)\n", - "bus_only.replace({'grant_recipient': new_dict}, inplace=True)" + "project[project[\"county\"] == \"3, 4\"]" ] }, { "cell_type": "code", "execution_count": null, - "id": "195fb532-8154-4ca1-ae6a-81b16d6f031e", + "id": "1de28ea8-d063-4797-8111-2cf073e71e71", + "metadata": {}, + "outputs": [], + "source": [ + "# change county value from '3, 4' to 'VAR' like the other rows.\n", + "project.at[3, \"county\"] = \"VAR\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23170f02-1d66-4bb3-917c-9ca2ceaba627", "metadata": {}, "outputs": [], "source": [ - "list(bus_only['grant_recipient'].sort_values().unique())" + "# check work\n", + "project.iloc[3]" ] }, { "cell_type": "markdown", - "id": "a5d0e920-cfd9-465f-89eb-81214b27070a", + "id": "52ae834d-72a2-4c51-8b78-a533a807d497", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "### aggregate up" + "### Cleaning `district`column\n", + "This is good as is, no cleaning requried. All rows with VAR district has VAR in county as well." ] }, { "cell_type": "code", - "execution_count": 5, - "id": "a0a396f0-c9ad-48bd-9767-45b5a8b53d25", + "execution_count": null, + "id": "39b424d0-7d59-4d7b-88f2-ffc5b80ae9c8", "metadata": {}, "outputs": [], "source": [ - "#aggregate # of buses and allocation by transit agency\n", - "#bus_cost = only_bus.groupby('grant_recipient').agg({\n", - "# '#_of_buses':\"sum\",\n", - "# 'allocation_amount':'sum'\n", - "#}).reset_index()\n", - "\n", - "bus_cost = bus_only.groupby('grant_recipient').agg({\n", - " 'bus_count':'sum',\n", - " 'tircp_award_amount_($)': 'sum'\n", - "}).reset_index()" + "project.district.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "c5c8dfa3-310f-47bd-8d95-2eed37feec73", + "id": "3ad49c6e-c13a-4460-b214-8ddac6e24f29", "metadata": {}, "outputs": [], "source": [ - "#confirm aggregation worked\n", - "bus_cost" + "project[project[\"district\"] == \"VAR\"]" ] }, { "cell_type": "markdown", - "id": "0f074183-9110-41fc-820b-4483fe9b076b", + "id": "b7be5cfa-b593-4c26-849a-2d9c2f777f34", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## create new cost per bus column" + "### Clean `ppno` column\n", + "This should all be fine as is, no cleaning needed" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "391fdd1a-585b-43e4-b70b-18c7f54a8263", + "execution_count": null, + "id": "3611650f-aa4d-4cf8-8153-4c809c0e7240", "metadata": {}, "outputs": [], "source": [ - "bus_cost['cost_per_bus']= (bus_cost['tircp_award_amount_($)']/bus_cost['bus_count']).astype('int64')" + "list(project.ppno.unique())" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "3219820e-0d80-4c1b-92c5-f098f09a22a9", + "execution_count": null, + "id": "8c92f80b-5898-4ff7-b184-cdb804ff564a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", + "outputs": [], + "source": [ + "project[project[\"ppno\"] == \"CP052/CP053\"]" + ] + }, + { + "cell_type": "markdown", + "id": "7f8dd43f-ebef-4a75-aa2f-63b1faf6f514", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### Skim the project description column?\n", + "double check to ensure bus count is accurate to what the description says?\n", + "\n", + "Saw that some rows mention procuring both zero and non-zero emission buses (count total buses in `bus count` and `VAR` in prop type and bus size?\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "163a339e-7564-455e-9477-b9df0914ef0c", + "metadata": {}, + "outputs": [], + "source": [ + "project[\n", + " project[\"project_title\"]\n", + " == \"ATN FAST (Family of Advanced Solutions for Transit): Revolutionizing Transit for a Global Audience\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6c9992f-8a4f-403d-8bc8-f45b55575402", + "metadata": {}, + "outputs": [], + "source": [ + "# iloc check\n", + "project.iloc[73]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74702a2f-0b82-47ae-81ae-f7c021394ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# code to update value at specific index and column\n", + "project.loc[73, \"bus_count\"] = 42\n", + "\n", + "# check work\n", + "project.iloc[73]" + ] + }, + { + "cell_type": "markdown", + "id": "1c6f0650-9ef5-46cd-a2b7-63265c793780", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "5140da93-1ee3-4bd8-8cd4-01745947ff48", + "metadata": {}, + "source": [ + "## Export cleaned Project df " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55872945-c6fa-4a98-b674-91f91d39d08f", + "metadata": {}, + "outputs": [], + "source": [ + "# exproject cleaned project df\n", + "project.to_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a931b907-8431-4290-96dc-2e1b40b6e64f", + "metadata": { + "tags": [] + }, + "source": [ + "## Read in cleaned project data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9b12c971-94d5-4ef8-93d3-2994df1826d3", + "metadata": {}, + "outputs": [], + "source": [ + "project = pd.read_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d5c9f3d1-9437-48a3-a37f-0d74860a1499", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(124, 14)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['Unnamed: 0.1', 'Unnamed: 0', 'award_year', 'project_#',\n", + " 'grant_recipient', 'project_title', 'ppno', 'district', 'county',\n", + " 'project_description', 'bus_count', 'master_agreement_number',\n", + " 'total_project_cost', 'tircp_award_amount_($)'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ensure df is able to read in\n", + "display(project.shape, project.columns)" + ] + }, + { + "cell_type": "markdown", + "id": "51f95400-1198-46a6-a41a-fef99b3a2ffa", + "metadata": { + "tags": [] + }, + "source": [ + "### filter df for project descriptions that contain bus" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fc9ee142-13b2-4fc2-86af-d712ab5df6c4", + "metadata": {}, + "outputs": [], + "source": [ + "bus_only = project[project[\"bus_count\"] > 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c65b773e-dd41-4c95-8e6b-2132e5d7e978", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(124, 14)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(37, 14)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# this looks correct\n", + "display(project.shape, bus_only.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "fdd09938-88b1-4fcd-8159-1637a57ee0f4", + "metadata": { + "tags": [] + }, + "source": [ + "## New column for propulsion type - `prop_type`\n", + "Use on `bus_only` df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "630e1662-9a32-4a2c-8174-1a0dc59ad42e", + "metadata": {}, + "outputs": [], + "source": [ + "prop_type = [\n", + " \"electric buses\",\n", + " \"electric commuter\",\n", + " \"Electric Buses\",\n", + " \"battery electric\",\n", + " \"Batery Electric\",\n", + " \"battery-electric\",\n", + " \"fuel-cell\",\n", + " \"fuel cell\",\n", + " \"Fuel Cell\",\n", + " \"zero emission\",\n", + " \"Zero Emission\",\n", + " \"zero-emission electric buses\",\n", + " \"zero-emission buses\",\n", + " \"zero‐emission\",\n", + " \"zero-emission\",\n", + " \"zeroemission\",\n", + " \"CNG\",\n", + " \"cng\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "9cb544f0-81e3-4644-a4f8-5b83009e3e18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(prop_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "fa958cba-0e59-464b-959f-c4c18b61f8cc", + "metadata": {}, + "outputs": [], + "source": [ + "# function to match keywords to list\n", + "def prop_type_finder(description):\n", + " for keyword in prop_type:\n", + " if keyword in description:\n", + " return keyword\n", + " return \"not specified\"" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6c9f56c8-ee70-426c-ba62-2294b2b13fa7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_469/3402581703.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " bus_only['prop_type'] = bus_only['project_description'].apply(prop_type_finder)\n" + ] + } + ], + "source": [ + "# add new col `prop_type`, fill it with values based on project_description using prop_type_finder function\n", + "bus_only[\"prop_type\"] = bus_only[\"project_description\"].apply(prop_type_finder)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "c35ceca0-1049-4c1f-b24c-569c45f97f5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Unnamed: 0.1', 'Unnamed: 0', 'award_year', 'project_#',\n", + " 'grant_recipient', 'project_title', 'ppno', 'district', 'county',\n", + " 'project_description', 'bus_count', 'master_agreement_number',\n", + " 'total_project_cost', 'tircp_award_amount_($)', 'prop_type'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "electric buses 13\n", + "zero-emission buses 7\n", + "zero‐emission 5\n", + "zero emission 3\n", + "zero-emission 2\n", + "electric commuter 1\n", + "CNG 1\n", + "not specified 1\n", + "fuel-cell 1\n", + "battery electric 1\n", + "battery-electric 1\n", + "fuel cell 1\n", + "Name: prop_type, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# check work\n", + "display(\n", + " bus_only.columns,\n", + " bus_only[\"prop_type\"].value_counts(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "251b72b3-7ab4-4028-8cad-1f39b4e334c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0.1Unnamed: 0award_yearproject_#grant_recipientproject_titleppnodistrictcountyproject_descriptionbus_countmaster_agreement_numbertotal_project_costtircp_award_amount_($)prop_type
515151201824Shasta Regional Transportation Agency (SRTA)North State Intercity Bus SystemCP0452VARPurchase 7 new coach-style buses to support a ...14.064SRTAMA95160008641000not specified
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0.1 Unnamed: 0 award_year project_# \\\n", + "51 51 51 2018 24 \n", + "\n", + " grant_recipient \\\n", + "51 Shasta Regional Transportation Agency (SRTA) \n", + "\n", + " project_title ppno district county \\\n", + "51 North State Intercity Bus System CP045 2 VAR \n", + "\n", + " project_description bus_count \\\n", + "51 Purchase 7 new coach-style buses to support a ... 14.0 \n", + "\n", + " master_agreement_number total_project_cost tircp_award_amount_($) \\\n", + "51 64SRTAMA 9516000 8641000 \n", + "\n", + " prop_type \n", + "51 not specified " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# exploring the not specified rows\n", + "bus_only[bus_only[\"prop_type\"] == \"not specified\"]\n", + "# coach-style buses, this row does not specify if buses are zero or non-zero emission bus. GOOD TO GO" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "bd6eda27-aa67-4a3c-b863-531fbb667da3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0.1Unnamed: 0award_yearproject_#grant_recipientproject_titleppnodistrictcountyproject_descriptionbus_countmaster_agreement_numbertotal_project_costtircp_award_amount_($)prop_type
55520156Orange County Transportation Authority (OCTA)Bravo! Route 560 Rapid BusesCP00412ORAPurchase five 40-foot CNG buses for BRT Route ...40.064OCTAMA29000002320000CNG
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0.1 Unnamed: 0 award_year project_# \\\n", + "5 5 5 2015 6 \n", + "\n", + " grant_recipient \\\n", + "5 Orange County Transportation Authority (OCTA) \n", + "\n", + " project_title ppno district county \\\n", + "5 Bravo! Route 560 Rapid Buses CP004 12 ORA \n", + "\n", + " project_description bus_count \\\n", + "5 Purchase five 40-foot CNG buses for BRT Route ... 40.0 \n", + "\n", + " master_agreement_number total_project_cost tircp_award_amount_($) \\\n", + "5 64OCTAMA 2900000 2320000 \n", + "\n", + " prop_type \n", + "5 CNG " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# what is in CNG rows?\n", + "bus_only[bus_only[\"prop_type\"] == \"CNG\"]\n", + "# was 4 rows, then adjusted prop list to have cng at the bottom. now showing 1 row thats actually CNG" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "8f056155-1a0f-4d7f-85af-ee6d96069eab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['CNG',\n", + " 'battery electric',\n", + " 'battery-electric',\n", + " 'electric buses',\n", + " 'electric commuter',\n", + " 'fuel cell',\n", + " 'fuel-cell',\n", + " 'not specified',\n", + " 'zero emission',\n", + " 'zero-emission',\n", + " 'zero-emission buses',\n", + " 'zero‐emission']" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# consolidate values\n", + "list(bus_only[\"prop_type\"].sort_values(ascending=True).unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8964ac4d-d7c8-457c-98b0-577d6e5e30ef", + "metadata": {}, + "outputs": [], + "source": [ + "prop_dict = {\n", + " \"battery electric\": \"BEB\",\n", + " \"battery-electric\": \"BEB\",\n", + " \"electric buses\": \"electric (not specified)\",\n", + " \"electric commuter\": \"electric (not specified)\",\n", + " \"fuel cell\": \"FCEB\",\n", + " \"fuel-cell\": \"FCEB\",\n", + " \"zero-emission buses\": \"zero-emission bus (not specified)\",\n", + " \"zero emission\": \"zero-emission bus (not specified)\",\n", + " \"zero-emission\": \"zero-emission bus (not specified)\",\n", + " \"zero‐emission\": \"zero-emission bus (not specified)\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "1204ed35-8e16-4a14-9c41-8c22b24a1503", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_469/3978264113.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " bus_only.replace({'prop_type': prop_dict}, inplace=True)\n" + ] + } + ], + "source": [ + "# replacing prop_type values with dictionary\n", + "bus_only.replace({\"prop_type\": prop_dict}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d2da1c1b-c91b-42e5-b3ff-583f3fd60676", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "zero-emission bus (not specified) 17\n", + "electric (not specified) 14\n", + "FCEB 2\n", + "BEB 2\n", + "CNG 1\n", + "not specified 1\n", + "Name: prop_type, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0.1Unnamed: 0award_yearproject_#grant_recipientproject_titleppnodistrictcountyproject_descriptionbus_countmaster_agreement_numbertotal_project_costtircp_award_amount_($)prop_type
00020151Antelope Valley Transit Authority (AVTA)Regional Transit Interconnectivity & Environme...CP0057LAPurchase 13 60-foot articulated BRT buses and ...29.064AVTA2015MA3947800024403000electric (not specified)
55520156Orange County Transportation Authority (OCTA)Bravo! Route 560 Rapid BusesCP00412ORAPurchase five 40-foot CNG buses for BRT Route ...40.064OCTAMA29000002320000CNG
111111201512San Joaquin Regional Transit District (SJRTD)BRT Expansion: MLK Corridor and Crosstown Mine...CP01110SJBus rapid transit infrastructure along the MLK...12.064SJRRCMA A1191187766841000zero-emission bus (not specified)
16161620163Foothill TransitTransforming California: Bus Electrification, ...CP0767LAPurchase 20 zero-emission buses to extend Rout...20.064FOOTHILLMA165800005000000zero-emission bus (not specified)
29292920182Anaheim Transportation Network (ATN)#Electrify Anaheim: Changing the Transit Parad...CP02712ORADeploys 40 zero-emission electric buses to dou...40.064ATNMA A14520100028617000electric (not specified)
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0.1 Unnamed: 0 award_year project_# \\\n", + "0 0 0 2015 1 \n", + "5 5 5 2015 6 \n", + "11 11 11 2015 12 \n", + "16 16 16 2016 3 \n", + "29 29 29 2018 2 \n", + "\n", + " grant_recipient \\\n", + "0 Antelope Valley Transit Authority (AVTA) \n", + "5 Orange County Transportation Authority (OCTA) \n", + "11 San Joaquin Regional Transit District (SJRTD) \n", + "16 Foothill Transit \n", + "29 Anaheim Transportation Network (ATN) \n", + "\n", + " project_title ppno district county \\\n", + "0 Regional Transit Interconnectivity & Environme... CP005 7 LA \n", + "5 Bravo! Route 560 Rapid Buses CP004 12 ORA \n", + "11 BRT Expansion: MLK Corridor and Crosstown Mine... CP011 10 SJ \n", + "16 Transforming California: Bus Electrification, ... CP076 7 LA \n", + "29 #Electrify Anaheim: Changing the Transit Parad... CP027 12 ORA \n", + "\n", + " project_description bus_count \\\n", + "0 Purchase 13 60-foot articulated BRT buses and ... 29.0 \n", + "5 Purchase five 40-foot CNG buses for BRT Route ... 40.0 \n", + "11 Bus rapid transit infrastructure along the MLK... 12.0 \n", + "16 Purchase 20 zero-emission buses to extend Rout... 20.0 \n", + "29 Deploys 40 zero-emission electric buses to dou... 40.0 \n", + "\n", + " master_agreement_number total_project_cost tircp_award_amount_($) \\\n", + "0 64AVTA2015MA 39478000 24403000 \n", + "5 64OCTAMA 2900000 2320000 \n", + "11 64SJRRCMA A1 19118776 6841000 \n", + "16 64FOOTHILLMA 16580000 5000000 \n", + "29 64ATNMA A1 45201000 28617000 \n", + "\n", + " prop_type \n", + "0 electric (not specified) \n", + "5 CNG \n", + "11 zero-emission bus (not specified) \n", + "16 zero-emission bus (not specified) \n", + "29 electric (not specified) " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# check work\n", + "display(bus_only.prop_type.value_counts(), bus_only.head())\n", + "\n", + "# looks good" + ] + }, + { + "cell_type": "markdown", + "id": "392dd768-88e4-42bb-a26f-ab95e225b271", + "metadata": { + "tags": [] + }, + "source": [ + "## New column for bus size type - `bus_size_type`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "278079a5-9ebb-4ac2-9363-ecfc0b6a818f", + "metadata": {}, + "outputs": [], + "source": [ + "bus_size = [\n", + " \"standard\",\n", + " \"30-foot\",\n", + " \"40 foot\",\n", + " \"40-foot\",\n", + " \"45-foot\",\n", + " \"45 foot\",\n", + " \"40ft\",\n", + " \"60-foot\",\n", + " \"articulated\",\n", + " \"cutaway\",\n", + " \"coach-style\",\n", + " \"over-the-road\",\n", + " \"feeder bus\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "9988791c-8ec6-45c4-b7b6-4fa8ccbfe823", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(bus_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "71b25adf-440a-4f52-9d18-f908f57d5aab", + "metadata": {}, + "outputs": [], + "source": [ + "# re writing prop type funct for bus size\n", + "def bus_size_finder(description):\n", + " for keyword in bus_size:\n", + " if keyword in description:\n", + " return keyword\n", + " return \"not specified\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "45ab8fdc-e752-4584-8bb8-af4426861a71", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_469/11797775.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " bus_only[\"bus_size_type\"] = bus_only[\"project_description\"].apply(bus_size_finder)\n" + ] + } + ], + "source": [ + "# creating new column, filling the column using the function applied to project_desctiotion\n", + "bus_only[\"bus_size_type\"] = bus_only[\"project_description\"].apply(bus_size_finder)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "c0ce4822-7356-4e52-94cb-04269eb82db9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Unnamed: 0.1', 'Unnamed: 0', 'award_year', 'project_#',\n", + " 'grant_recipient', 'project_title', 'ppno', 'district', 'county',\n", + " 'project_description', 'bus_count', 'master_agreement_number',\n", + " 'total_project_cost', 'tircp_award_amount_($)', 'prop_type',\n", + " 'bus_size_type'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "not specified 30\n", + "over-the-road 2\n", + "45-foot 1\n", + "40-foot 1\n", + "40 foot 1\n", + "coach-style 1\n", + "feeder bus 1\n", + "Name: bus_size_type, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# checking work\n", + "display(bus_only.columns, bus_only.bus_size_type.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "a17a1dfa-d70a-4076-b9c1-618a43398262", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['40 foot',\n", + " '40-foot',\n", + " '45-foot',\n", + " 'coach-style',\n", + " 'feeder bus',\n", + " 'not specified',\n", + " 'over-the-road']" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(bus_only['bus_size_type'].sort_values().unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "ad0bda67-e3b2-4bdf-9bfb-0afa7b19bc0d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0.1Unnamed: 0award_yearproject_#grant_recipientproject_titleppnodistrictcountyproject_descriptionbus_countmaster_agreement_numbertotal_project_costtircp_award_amount_($)prop_typebus_size_type
99999920234City of PasadenaPasadena Transit System Legacy Project: #MoreS...CP1107LAPurchases 40 zero-emission buses and associate...40.0NaN14426300014424000zero-emission bus (not specified)not specified
34343420187City of Los Angeles (LA DOT)Los Angeles City: Leading the Transformation t...CP0297LAAcquire 112 zero-emission buses to replace exi...112.064LADOTMA10279000036104000zero-emission bus (not specified)not specified
33333320186City of FresnoSouthwest Fresno Community ConnectorCP0796FREPurchase of 6 zero-emission battery-electric b...6.064FRESNOMA-01116570007798000electric (not specified)not specified
16161620163Foothill TransitTransforming California: Bus Electrification, ...CP0767LAPurchase 20 zero-emission buses to extend Rout...20.064FOOTHILLMA165800005000000zero-emission bus (not specified)not specified
81818120229City of WascoCity of Wasco Improving Air Quality and Econom...CP0906KERPurchase of 3 zero-emission buses that will su...3.0Pending15430001000000zero-emission bus (not specified)not specified
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0.1 Unnamed: 0 award_year project_# \\\n", + "99 99 99 2023 4 \n", + "34 34 34 2018 7 \n", + "33 33 33 2018 6 \n", + "16 16 16 2016 3 \n", + "81 81 81 2022 9 \n", + "\n", + " grant_recipient \\\n", + "99 City of Pasadena \n", + "34 City of Los Angeles (LA DOT) \n", + "33 City of Fresno \n", + "16 Foothill Transit \n", + "81 City of Wasco \n", + "\n", + " project_title ppno district county \\\n", + "99 Pasadena Transit System Legacy Project: #MoreS... CP110 7 LA \n", + "34 Los Angeles City: Leading the Transformation t... CP029 7 LA \n", + "33 Southwest Fresno Community Connector CP079 6 FRE \n", + "16 Transforming California: Bus Electrification, ... CP076 7 LA \n", + "81 City of Wasco Improving Air Quality and Econom... CP090 6 KER \n", + "\n", + " project_description bus_count \\\n", + "99 Purchases 40 zero-emission buses and associate... 40.0 \n", + "34 Acquire 112 zero-emission buses to replace exi... 112.0 \n", + "33 Purchase of 6 zero-emission battery-electric b... 6.0 \n", + "16 Purchase 20 zero-emission buses to extend Rout... 20.0 \n", + "81 Purchase of 3 zero-emission buses that will su... 3.0 \n", + "\n", + " master_agreement_number total_project_cost tircp_award_amount_($) \\\n", + "99 NaN 144263000 14424000 \n", + "34 64LADOTMA 102790000 36104000 \n", + "33 64FRESNOMA-01 11657000 7798000 \n", + "16 64FOOTHILLMA 16580000 5000000 \n", + "81 Pending 1543000 1000000 \n", + "\n", + " prop_type bus_size_type \n", + "99 zero-emission bus (not specified) not specified \n", + "34 zero-emission bus (not specified) not specified \n", + "33 electric (not specified) not specified \n", + "16 zero-emission bus (not specified) not specified \n", + "81 zero-emission bus (not specified) not specified " + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# expected that not a lot of rows specify a size type.\n", + "# will still take a random peek into some\n", + "\n", + "bus_only[bus_only[\"bus_size_type\"] == \"not specified\"].sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "ab06f71f-3f64-4b12-8164-fe3524c97d64", + "metadata": {}, + "outputs": [], + "source": [ + "# consolidate\n", + "size_dict={'40 foot': 'conventional (40-ft like)' ,\n", + " '40-foot': 'conventional (40-ft like)',\n", + " '45-foot': 'conventional (40-ft like)',\n", + " 'coach-style':'over-the-road',\n", + " 'feeder bus': 'conventional (40-ft like)',\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "be465cfd-fb25-4862-a709-c3dd675a8fbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(size_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "d66297eb-2ced-4148-9722-167797349280", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_469/3147187588.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " bus_only.replace({\"bus_size_type\": size_dict}, inplace=True)\n" + ] + } + ], + "source": [ + "# .replace() with size_dict to replace values in bus size col\n", + "bus_only.replace({\"bus_size_type\": size_dict}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "055fbee5-b73b-4779-801b-1c0d23e081af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "not specified 30\n", + "conventional (40-ft like) 4\n", + "over-the-road 3\n", + "Name: bus_size_type, dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check work\n", + "bus_only.bus_size_type.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "1b95f567-13fc-4184-8b09-f512f702f3f0", + "metadata": { + "tags": [] + }, + "source": [ + "## export project- bus only df" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "cecea030-b37a-4170-9997-656e8bd0c080", + "metadata": {}, + "outputs": [], + "source": [ + "bus_only.to_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1ea926da-8d76-481c-a5fb-ef39606f45ca", + "metadata": { + "tags": [] + }, + "source": [ + "## Read in project bus only data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "abaf10b2-0dc3-432d-845a-8dacb2af806f", + "metadata": {}, + "outputs": [], + "source": [ + "bus_checker = pd.read_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "ab4b7cf3-183e-4ea1-baca-e3f5d0bd9dd6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(37, 17)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'award_year', 'project_#',\n", + " 'grant_recipient', 'project_title', 'ppno', 'district', 'county',\n", + " 'project_description', 'bus_count', 'master_agreement_number',\n", + " 'total_project_cost', 'tircp_award_amount_($)', 'prop_type',\n", + " 'bus_size_type'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
Unnamed: 0.2Unnamed: 0.1Unnamed: 0award_yearproject_#grant_recipientproject_titleppnodistrictcountyproject_descriptionbus_countmaster_agreement_numbertotal_project_costtircp_award_amount_($)cost_per_busprop_typebus_size_type
22Santa Monica Big Blue Bus7.01105000157857
7City of Santa Monica113.026027000230327
12Foothill Transit20.05000000250000
5City of Los Angeles (LA DOT)112.036104000322357
10City of Wasco3.01000000333333000020151Antelope Valley Transit Authority (AVTA)Regional Transit Interconnectivity & Environme...CP0057LAPurchase 13 60-foot articulated BRT buses and ...29.064AVTA2015MA3947800024403000electric (not specified)conventional (40-ft like)
6City of Pasadena155520156Orange County Transportation Authority (OCTA)Bravo! Route 560 Rapid BusesCP00412ORAPurchase five 40-foot CNG buses for BRT Route ...40.01442400036060064OCTAMA29000002320000CNGconventional (40-ft like)
192111111201512San Joaquin Regional Transit District (SJRTD)BRT Expansion: MLK Corridor and Crosstown Mine...CP01110SJBus rapid transit infrastructure along the MLK...12.064SJRRCMA A1191187766841000570083
18Orange County Transportation Authority (OCTA)73.041727000571602
23Shasta Regional Transportation Agency (SRTA)14.08641000617214
11Culver City5.03247000649400
17Los Angeles County Metropolitan Transportation...261.0177500000680076
0Anaheim Transportation Network (ATN)65.051395000790692
25Sonoma County Transportation Authority (SCTA)30.024825000827500
24Solano Transportation Authority (STA)13.010788000829846
26Torrance Transit Department7.06000000857142
29Yuba-Sutter Transit16.013725000857812
1Antelope Valley Transit Authority (AVTA)36.035735000992638
27Transit Joint Powers Authority of Merced County3.031120001037333
8City of Simi Valley6.070530001175500
4City of Glendale and Arroyo Verdugo Communities27.0346480001283259
16Long Beach Transit (LBT)5.064510001290200zero-emission bus (not specified)not specified
3City of Fresno6.077980001299666
21Santa Cruz Metropolitan Transit District (Metro)24.0385890001607875
20Santa Barbara Metropolitan Transit District (S...8.0144800001810000
2Antelope Valley Transit Authority (AVTA) & Lon...7.0131560001879428
14Humboldt Transit Authority (HTA) with Yurok Tr...4.086120002153000
28Tulare County Regional Transit Agency (TCRTA)14.0337690002412071
15Lake Transit Authority (LTA)4.0129940003248500
13Humboldt Transit Authority (HTA)11.038743000352209016161620163Foothill TransitTransforming California: Bus Electrification, ...CP0767LAPurchase 20 zero-emission buses to extend Rout...20.064FOOTHILLMA165800005000000zero-emission bus (not specified)not specified
9City of Torrance10.0960000009600000429292920182Anaheim Transportation Network (ATN)#Electrify Anaheim: Changing the Transit Parad...CP02712ORADeploys 40 zero-emission electric buses to dou...40.064ATNMA A14520100028617000electric (not specified)not specified
\n", "
" ], "text/plain": [ - " grant_recipient bus_count \\\n", - "22 Santa Monica Big Blue Bus 7.0 \n", - "7 City of Santa Monica 113.0 \n", - "12 Foothill Transit 20.0 \n", - "5 City of Los Angeles (LA DOT) 112.0 \n", - "10 City of Wasco 3.0 \n", - "6 City of Pasadena 40.0 \n", - "19 San Joaquin Regional Transit District (SJRTD) 12.0 \n", - "18 Orange County Transportation Authority (OCTA) 73.0 \n", - "23 Shasta Regional Transportation Agency (SRTA) 14.0 \n", - "11 Culver City 5.0 \n", - "17 Los Angeles County Metropolitan Transportation... 261.0 \n", - "0 Anaheim Transportation Network (ATN) 65.0 \n", - "25 Sonoma County Transportation Authority (SCTA) 30.0 \n", - "24 Solano Transportation Authority (STA) 13.0 \n", - "26 Torrance Transit Department 7.0 \n", - "29 Yuba-Sutter Transit 16.0 \n", - "1 Antelope Valley Transit Authority (AVTA) 36.0 \n", - "27 Transit Joint Powers Authority of Merced County 3.0 \n", - "8 City of Simi Valley 6.0 \n", - "4 City of Glendale and Arroyo Verdugo Communities 27.0 \n", - "16 Long Beach Transit (LBT) 5.0 \n", - "3 City of Fresno 6.0 \n", - "21 Santa Cruz Metropolitan Transit District (Metro) 24.0 \n", - "20 Santa Barbara Metropolitan Transit District (S... 8.0 \n", - "2 Antelope Valley Transit Authority (AVTA) & Lon... 7.0 \n", - "14 Humboldt Transit Authority (HTA) with Yurok Tr... 4.0 \n", - "28 Tulare County Regional Transit Agency (TCRTA) 14.0 \n", - "15 Lake Transit Authority (LTA) 4.0 \n", - "13 Humboldt Transit Authority (HTA) 11.0 \n", - "9 City of Torrance 10.0 \n", + " Unnamed: 0.2 Unnamed: 0.1 Unnamed: 0 award_year project_# \\\n", + "0 0 0 0 2015 1 \n", + "1 5 5 5 2015 6 \n", + "2 11 11 11 2015 12 \n", + "3 16 16 16 2016 3 \n", + "4 29 29 29 2018 2 \n", + "\n", + " grant_recipient \\\n", + "0 Antelope Valley Transit Authority (AVTA) \n", + "1 Orange County Transportation Authority (OCTA) \n", + "2 San Joaquin Regional Transit District (SJRTD) \n", + "3 Foothill Transit \n", + "4 Anaheim Transportation Network (ATN) \n", + "\n", + " project_title ppno district county \\\n", + "0 Regional Transit Interconnectivity & Environme... CP005 7 LA \n", + "1 Bravo! Route 560 Rapid Buses CP004 12 ORA \n", + "2 BRT Expansion: MLK Corridor and Crosstown Mine... CP011 10 SJ \n", + "3 Transforming California: Bus Electrification, ... CP076 7 LA \n", + "4 #Electrify Anaheim: Changing the Transit Parad... CP027 12 ORA \n", "\n", - " tircp_award_amount_($) cost_per_bus \n", - "22 1105000 157857 \n", - "7 26027000 230327 \n", - "12 5000000 250000 \n", - "5 36104000 322357 \n", - "10 1000000 333333 \n", - "6 14424000 360600 \n", - "19 6841000 570083 \n", - "18 41727000 571602 \n", - "23 8641000 617214 \n", - "11 3247000 649400 \n", - "17 177500000 680076 \n", - "0 51395000 790692 \n", - "25 24825000 827500 \n", - "24 10788000 829846 \n", - "26 6000000 857142 \n", - "29 13725000 857812 \n", - "1 35735000 992638 \n", - "27 3112000 1037333 \n", - "8 7053000 1175500 \n", - "4 34648000 1283259 \n", - "16 6451000 1290200 \n", - "3 7798000 1299666 \n", - "21 38589000 1607875 \n", - "20 14480000 1810000 \n", - "2 13156000 1879428 \n", - "14 8612000 2153000 \n", - "28 33769000 2412071 \n", - "15 12994000 3248500 \n", - "13 38743000 3522090 \n", - "9 96000000 9600000 " + " project_description bus_count \\\n", + "0 Purchase 13 60-foot articulated BRT buses and ... 29.0 \n", + "1 Purchase five 40-foot CNG buses for BRT Route ... 40.0 \n", + "2 Bus rapid transit infrastructure along the MLK... 12.0 \n", + "3 Purchase 20 zero-emission buses to extend Rout... 20.0 \n", + "4 Deploys 40 zero-emission electric buses to dou... 40.0 \n", + "\n", + " master_agreement_number total_project_cost tircp_award_amount_($) \\\n", + "0 64AVTA2015MA 39478000 24403000 \n", + "1 64OCTAMA 2900000 2320000 \n", + "2 64SJRRCMA A1 19118776 6841000 \n", + "3 64FOOTHILLMA 16580000 5000000 \n", + "4 64ATNMA A1 45201000 28617000 \n", + "\n", + " prop_type bus_size_type \n", + "0 electric (not specified) conventional (40-ft like) \n", + "1 CNG conventional (40-ft like) \n", + "2 zero-emission bus (not specified) not specified \n", + "3 zero-emission bus (not specified) not specified \n", + "4 electric (not specified) not specified " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(bus_checker.shape, bus_checker.columns, bus_checker.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "aab862f1-7aa2-47ea-a5bc-52483b097c1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['conventional (40-ft like)', 'not specified', 'over-the-road'],\n", + " dtype=object)" ] }, - "execution_count": 7, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#confirm new column was created and values were populated\n", - "bus_cost.sort_values('cost_per_bus')" + "# inspect columns values.\n", + "bus_only[\"bus_size_type\"].sort_values().unique()\n", + "# everything looks good" + ] + }, + { + "cell_type": "markdown", + "id": "c0aa374e-985b-46b3-a7ab-f3bc66e36204", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## DEPRECATED - Data Analysis\n", + "see `cost_per_bus_analysis` notebook" + ] + }, + { + "cell_type": "markdown", + "id": "02cce57d-f82c-4f85-be53-814538b6b6c3", + "metadata": { + "tags": [] + }, + "source": [ + "### Consolidate up grant recipient name" + ] + }, + { + "cell_type": "markdown", + "id": "a5d0e920-cfd9-465f-89eb-81214b27070a", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### aggregate up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0a396f0-c9ad-48bd-9767-45b5a8b53d25", + "metadata": {}, + "outputs": [], + "source": [ + "# aggregate # of buses and allocation by transit agency\n", + "# bus_cost = only_bus.groupby('grant_recipient').agg({\n", + "# '#_of_buses':\"sum\",\n", + "# 'allocation_amount':'sum'\n", + "# }).reset_index()\n", + "\n", + "bus_cost = (\n", + " bus_only.groupby(\"grant_recipient\")\n", + " .agg({\"bus_count\": \"sum\", \"tircp_award_amount_($)\": \"sum\"})\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5c8dfa3-310f-47bd-8d95-2eed37feec73", + "metadata": {}, + "outputs": [], + "source": [ + "# confirm aggregation worked\n", + "bus_cost" + ] + }, + { + "cell_type": "markdown", + "id": "0f074183-9110-41fc-820b-4483fe9b076b", + "metadata": { + "tags": [] + }, + "source": [ + "### create new cost per bus column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "391fdd1a-585b-43e4-b70b-18c7f54a8263", + "metadata": {}, + "outputs": [], + "source": [ + "bus_cost[\"cost_per_bus\"] = (\n", + " bus_cost[\"tircp_award_amount_($)\"] / bus_cost[\"bus_count\"]\n", + ").astype(\"int64\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3219820e-0d80-4c1b-92c5-f098f09a22a9", + "metadata": {}, + "outputs": [], + "source": [ + "# confirm new column was created and values were populated\n", + "bus_cost.sort_values(\"cost_per_bus\")" ] }, { @@ -1103,17 +2678,19 @@ "id": "7c2df629-863e-476f-8f1c-934535a1feb0", "metadata": {}, "source": [ - "## Export cost per bus via project tracking sheet to gcs" + "### Export cost per bus via project tracking sheet to gcs" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "b7ad9fe7-a705-4138-8f3d-138f6d0146f6", "metadata": {}, "outputs": [], "source": [ - "bus_cost.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_cost_per_bus.csv')" + "bus_cost.to_csv(\n", + " \"gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_cost_per_bus.csv\"\n", + ")" ] }, {