Merge pull request #1387 from cityofaustin/1.42.0-release-candidate

Release VZ 1.42.0 - Utopia Ct
cityofaustin · Feb 29, 2024 · d374c20 · d374c20
2 parents cf8cd03 + 4db4406
commit d374c20
Show file tree

Hide file tree

Showing 36 changed files with 1,544 additions and 642 deletions.
diff --git a/atd-toolbox/cris_export_parser/parse_data.py b/atd-toolbox/cris_export_parser/parse_data.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+import csv
+import argparse
+import psycopg2
+import psycopg2.extras
+import time
+
+conn = psycopg2.connect(
+    dbname="atd_vz_data",
+    user="visionzero",
+    password="visionzero",
+    host="localhost",
+    port="5432",
+)
+
+
+def parse_csv(file_path, field_names, substrings):
+    with open(file_path, "r") as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            for field in field_names:
+                # Check if field is in row and not None
+                if field in row and row[field] is not None:
+                    fieldValue = row[field]  # Store the field value
+                    for substring in substrings:
+                        if substring.lower() in fieldValue.lower():
+                            if not (
+                                "Crash_ID" in row
+                                and "Unit_Nbr" in row
+                                and "Prsn_Nbr" in row
+                            ):
+                                continue
+
+                            with conn.cursor(
+                                cursor_factory=psycopg2.extras.RealDictCursor
+                            ) as cur:
+                                # Use the cursor here
+
+                                count = f"""
+select count(*) as count
+from atd_txdot_primaryperson
+where true
+and crash_id = {row.get("Crash_ID")} 
+and unit_nbr = {row.get("Unit_Nbr")}
+and prsn_nbr = {row.get("Prsn_Nbr")};"""
+
+                                cur.execute(count)
+                                count_check = cur.fetchone()
+                                print("row count: ", count_check["count"])
+                                if not count_check["count"] == 1:
+                                    print(
+                                        {
+                                            "Crash_ID": row.get("Crash_ID"),
+                                            "Prsn_Nbr": row.get("Unit_Nbr"),
+                                            "Prsn_Name": f"{row.get('Prsn_First_Name')} {row.get('Prsn_Last_Name')}",
+                                            "Drvr_City_Name": row.get("drvr_city_name"),
+                                            "Drvr_Street_Name": row.get(
+                                                "drvr_street_name"
+                                            ),
+                                        }
+                                    )
+                                    continue
+                                print("would update here...")
+
+                                update = f"""
+update atd_txdot_primaryperson
+set peh_fl = true
+where true
+and crash_id = {row.get("Crash_ID")} 
+and unit_nbr = {row.get("Unit_Nbr")}
+and prsn_nbr = {row.get("Prsn_Nbr")};"""
+
+                                print(update)
+                                cur.execute(update)
+                                conn.commit()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse a CSV file.")
+    parser.add_argument("-i", "--input", help="Input file path", required=True)
+    args = parser.parse_args()
+
+    fields = [
+        "Drvr_Street_Nbr",
+        "Drvr_Street_Pfx",
+        "Drvr_Street_Name",  # This is the one that has it in primary persons
+        "Drvr_Street_Sfx",
+        "Drvr_Apt_Nbr",
+        "Drvr_City_Name",
+        "Drvr_State_ID",
+        "Drvr_Zip",
+    ]
+    parse_csv(args.input, fields, ["homeless", "unhoused", "transient"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/atd-toolbox/get_lookup_table_changes/.gitignore b/atd-toolbox/get_lookup_table_changes/.gitignore
@@ -0,0 +1,3 @@
+env
+extract
+*.sql
diff --git a/atd-toolbox/get_lookup_table_changes/README.md b/atd-toolbox/get_lookup_table_changes/README.md
@@ -0,0 +1,23 @@
+This folder and the script it contains is for the purpose of getting our lookup tables up to date with the latest lookup export from CRIS. We will probably need to run this after every major CRIS release.
+
+### Install packages
+
+Create or activate your virtual python environment using [venv](https://docs.python.org/3/library/venv.html).
+
+Install packages from the requirements.txt file:
+
+`pip install -r requirements.txt`
+
+### Env file
+
+Rename the env_template to env
+
+Fill in the values using your credentials for the VZ read replica.
+
+### Running the script
+
+In order to run this script you need to have a recent CRIS lookup table export csv and provide the file path as an argument in the command line like so:
+
+`python3 get_lookup_table_changes.py --input path_to_extract.csv `
+
+Running `get_lookup_table_changes.py` can take awhile, so sit tight. Once the script is done, a file will be created in this directory called `up_migrations.sql` that contains all of the sql commands generated from running the script. The contents of this file can then be used to create a migration in the hasura console so we can track these huge changes. A file called `down_migrations.sql` is also created in this directory which you can use as the down migration.
diff --git a/atd-toolbox/get_lookup_table_changes/env_template b/atd-toolbox/get_lookup_table_changes/env_template
@@ -0,0 +1,5 @@
+DB_HOST=
+DB_USER=
+DB_PASS=
+DB_NAME=
+DB_SSL_REQUIREMENT=
diff --git a/atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py b/atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+import csv
+import json
+import time
+import re
+import os
+import psycopg2
+import psycopg2.extras
+from dotenv import load_dotenv
+import argparse
+
+load_dotenv("env")
+
+DB_HOST = os.getenv("DB_HOST")
+DB_USER = os.getenv("DB_USER")
+DB_PASS = os.getenv("DB_PASS")
+DB_NAME = os.getenv("DB_NAME")
+DB_SSL_REQUIREMENT = os.getenv("DB_SSL_REQUIREMENT")
+
+
+def get_pg_connection():
+    """
+    Returns a connection to the Postgres database
+    """
+    return psycopg2.connect(
+        host=DB_HOST,
+        user=DB_USER,
+        password=DB_PASS,
+        dbname=DB_NAME,
+        sslmode=DB_SSL_REQUIREMENT,
+        sslrootcert="/root/rds-combined-ca-bundle.pem",
+    )
+
+
+def table_exists(conn, table_name):
+    """
+    Checks if a table exists in a PostgreSQL database.
+
+    Args:
+    conn (psycopg2.extensions.connection): A connection to the PostgreSQL database.
+    table_name (str): The name of the table to check for existence.
+
+    Returns:
+    bool: True if the table exists, False otherwise.
+    """
+    try:
+        with conn.cursor() as cur:
+            cur.execute(
+                """
+                SELECT EXISTS (
+                    SELECT FROM information_schema.tables
+                    WHERE table_schema = 'public'
+                    AND table_name = %s
+                );
+            """,
+                (table_name,),
+            )
+
+            result = cur.fetchone()
+            return result[0]
+
+    except Exception as e:
+        print(f"Error checking table existence: {e}")
+        return False
+
+
+def read_and_group_csv(file_path):
+    """
+    Returns a dict where each key is the lookup table name and the value
+    is a dict of all the lookup ids/descs for that lookup table
+    """
+    grouped_data = {}
+
+    with open(file_path, newline="") as csvfile:
+        csvreader = csv.reader(csvfile, delimiter=",", quotechar='"')
+
+        # Skip the first row (header)
+        next(csvreader)
+
+        for row in csvreader:
+            key = row[0]
+            inner_dict = {"id": int(row[1]), "description": row[2]}
+
+            if key not in grouped_data:
+                grouped_data[key] = []
+
+            grouped_data[key].append(inner_dict)
+
+    return grouped_data
+
+
+def escape_single_quotes(input_string):
+    return input_string.replace("'", "''")
+
+
+def new_table(name):
+    return f"""
+    create table public.atd_txdot__{name}_lkp (
+        id serial primary key,
+        {name}_id integer not null,
+        {name}_desc varchar(255) not null
+    );
+    """
+
+
+def main(file_path):
+    data = read_and_group_csv(file_path)
+
+    # Pretty-print the grouped data as JSON
+    # print(json.dumps(data, indent=4))
+
+    pg = get_pg_connection()
+    cursor = pg.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+    changes = []
+    down_changes = []
+
+    for table in data:
+        # here are tables which are special cases
+        # The states (as in United States) is non-uniform and does not need inspection.
+        # The counties are equally fixed.
+        if table in ["STATE_ID", "CNTY_ID"]:
+            continue
+
+        match = re.search(r"(^.*)_ID$", table)
+        name_component = match.group(1).lower()
+
+        print()
+        print("👀Looking into table: ", name_component)
+
+        table_name = "atd_txdot__" + name_component + "_lkp"
+        exists = table_exists(pg, table_name)
+        if not exists:
+            print("💥 Missing table: ", table_name)
+            changes.append(f"\n-- Adding table {table_name}")
+            changes.append(new_table(name_component))
+            down_changes.append(f"\n-- Dropping table {table_name}")
+            new_table_down = f"drop table if exists public.{table_name};"
+            down_changes.append(new_table_down)
+            for record in data[table]:
+                # We do not have a record on file with this ID
+                print(f"❓ Id {str(record['id'])} not found in {table_name}")
+                print("      CSV Value: ", record["description"])
+                print()
+                insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
+                changes.append(insert)
+                # Dont need down changes here because the down is just deleting the table
+
+        else:
+            is_first_change = True
+            for record in data[table]:
+                sql = f"""
+                select {name_component}_id as id, {name_component}_desc as description 
+                from {table_name} where {name_component}_id = {str(record['id'])};
+                """
+                cursor.execute(sql)
+                db_result = cursor.fetchone()
+                if db_result:
+                    # We have a record on file with this ID
+                    if db_result["description"] == record["description"]:
+                        # print(f"✅ Value \"{record['description']}\" with id {str(record['id'])} found in {table_name}")
+                        pass
+                    else:
+                        print(
+                            f"❌ Id {str(record['id'])} found in {table_name} has a description mismatch:"
+                        )
+                        print("      CSV Value: ", record["description"])
+                        print("       DB Value: ", db_result["description"])
+                        print()
+                        update = f"update public.{table_name} set {name_component}_desc = '{escape_single_quotes(record['description'])}' where {name_component}_id = {str(record['id'])};"
+                        if is_first_change == True:
+                            changes.append(f"\n-- Changes to table {table_name}")
+                            down_changes.append(f"\n-- Changes to table {table_name}")
+                        changes.append(update)
+                        update_down = f"update public.{table_name} set {name_component}_desc = '{db_result['description']}' where {name_component}_id = {str(record['id'])};"
+                        down_changes.append(update_down)
+                        is_first_change = False
+                else:
+                    # We do not have a record on file with this ID
+                    # print(f"Value \"{record['description']}\" with id {str(record['id'])} not found in {table_name}")
+                    print(f"❓ Id {str(record['id'])} not found in {table_name}")
+                    print("      CSV Value: ", record["description"])
+                    print()
+                    insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
+                    if is_first_change == True:
+                        changes.append(f"\n-- Changes to table {table_name}")
+                        down_changes.append(f"\n-- Changes to table {table_name}")
+                    changes.append(insert)
+                    insert_down = f"delete from public.{table_name} where {name_component}_id = {str(record['id'])};"
+                    down_changes.append(insert_down)
+                    is_first_change = False
+
+    print("\n🛠️ Here are the changes to be made:\n")
+    print("\n".join(changes).strip())
+
+    outfile = open("up_migration.sql", "w")
+    outfile.write("\n".join(changes).strip())
+    outfile.close()
+
+    outfile_down = open("down_migration.sql", "w")
+    outfile_down.write("\n".join(down_changes).strip())
+    outfile_down.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", help="extract file path")
+    args = parser.parse_args()
+    file_path = args.input
+
+    main(file_path)
diff --git a/atd-toolbox/get_lookup_table_changes/requirements.txt b/atd-toolbox/get_lookup_table_changes/requirements.txt
@@ -0,0 +1,3 @@
+psycopg2==2.9.3
+psycopg2_binary==2.9.5
+python-dotenv==1.0.1
diff --git a/...vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_level_engaged_lkp.yaml b/...vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_level_engaged_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__autonomous_level_engaged_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_unit_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__autonomous_unit_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__autonomous_unit_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__inv_da_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__inv_da_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__inv_da_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__rpt_road_type_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__rpt_road_type_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__rpt_road_type_lkp
+  schema: public
diff --git a/atd-vzd/metadata/databases/default/tables/public_atd_txdot__trauma_centers_lkp.yaml b/atd-vzd/metadata/databases/default/tables/public_atd_txdot__trauma_centers_lkp.yaml
@@ -0,0 +1,3 @@
+table:
+  name: atd_txdot__trauma_centers_lkp
+  schema: public
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    env
+    extract
+    *.sql