Skip to content

Commit

Permalink
Merge pull request #1387 from cityofaustin/1.42.0-release-candidate
Browse files Browse the repository at this point in the history
Release VZ 1.42.0 - Utopia Ct
  • Loading branch information
johnclary authored Feb 29, 2024
2 parents cf8cd03 + 4db4406 commit d374c20
Show file tree
Hide file tree
Showing 36 changed files with 1,544 additions and 642 deletions.
98 changes: 98 additions & 0 deletions atd-toolbox/cris_export_parser/parse_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3

import csv
import argparse
import psycopg2
import psycopg2.extras
import time

conn = psycopg2.connect(
dbname="atd_vz_data",
user="visionzero",
password="visionzero",
host="localhost",
port="5432",
)


def parse_csv(file_path, field_names, substrings):
with open(file_path, "r") as file:
reader = csv.DictReader(file)
for row in reader:
for field in field_names:
# Check if field is in row and not None
if field in row and row[field] is not None:
fieldValue = row[field] # Store the field value
for substring in substrings:
if substring.lower() in fieldValue.lower():
if not (
"Crash_ID" in row
and "Unit_Nbr" in row
and "Prsn_Nbr" in row
):
continue

with conn.cursor(
cursor_factory=psycopg2.extras.RealDictCursor
) as cur:
# Use the cursor here

count = f"""
select count(*) as count
from atd_txdot_primaryperson
where true
and crash_id = {row.get("Crash_ID")}
and unit_nbr = {row.get("Unit_Nbr")}
and prsn_nbr = {row.get("Prsn_Nbr")};"""

cur.execute(count)
count_check = cur.fetchone()
print("row count: ", count_check["count"])
if not count_check["count"] == 1:
print(
{
"Crash_ID": row.get("Crash_ID"),
"Prsn_Nbr": row.get("Unit_Nbr"),
"Prsn_Name": f"{row.get('Prsn_First_Name')} {row.get('Prsn_Last_Name')}",
"Drvr_City_Name": row.get("drvr_city_name"),
"Drvr_Street_Name": row.get(
"drvr_street_name"
),
}
)
continue
print("would update here...")

update = f"""
update atd_txdot_primaryperson
set peh_fl = true
where true
and crash_id = {row.get("Crash_ID")}
and unit_nbr = {row.get("Unit_Nbr")}
and prsn_nbr = {row.get("Prsn_Nbr")};"""

print(update)
cur.execute(update)
conn.commit()


def main():
parser = argparse.ArgumentParser(description="Parse a CSV file.")
parser.add_argument("-i", "--input", help="Input file path", required=True)
args = parser.parse_args()

fields = [
"Drvr_Street_Nbr",
"Drvr_Street_Pfx",
"Drvr_Street_Name", # This is the one that has it in primary persons
"Drvr_Street_Sfx",
"Drvr_Apt_Nbr",
"Drvr_City_Name",
"Drvr_State_ID",
"Drvr_Zip",
]
parse_csv(args.input, fields, ["homeless", "unhoused", "transient"])


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions atd-toolbox/get_lookup_table_changes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
env
extract
*.sql
23 changes: 23 additions & 0 deletions atd-toolbox/get_lookup_table_changes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
This folder and the script it contains is for the purpose of getting our lookup tables up to date with the latest lookup export from CRIS. We will probably need to run this after every major CRIS release.

### Install packages

Create or activate your virtual python environment using [venv](https://docs.python.org/3/library/venv.html).

Install packages from the requirements.txt file:

`pip install -r requirements.txt`

### Env file

Rename the env_template to env

Fill in the values using your credentials for the VZ read replica.

### Running the script

In order to run this script you need to have a recent CRIS lookup table export csv and provide the file path as an argument in the command line like so:

`python3 get_lookup_table_changes.py --input path_to_extract.csv `

Running `get_lookup_table_changes.py` can take awhile, so sit tight. Once the script is done, a file will be created in this directory called `up_migrations.sql` that contains all of the sql commands generated from running the script. The contents of this file can then be used to create a migration in the hasura console so we can track these huge changes. A file called `down_migrations.sql` is also created in this directory which you can use as the down migration.
5 changes: 5 additions & 0 deletions atd-toolbox/get_lookup_table_changes/env_template
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DB_HOST=
DB_USER=
DB_PASS=
DB_NAME=
DB_SSL_REQUIREMENT=
211 changes: 211 additions & 0 deletions atd-toolbox/get_lookup_table_changes/get_lookup_table_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#!/usr/bin/env python

import csv
import json
import time
import re
import os
import psycopg2
import psycopg2.extras
from dotenv import load_dotenv
import argparse

load_dotenv("env")

DB_HOST = os.getenv("DB_HOST")
DB_USER = os.getenv("DB_USER")
DB_PASS = os.getenv("DB_PASS")
DB_NAME = os.getenv("DB_NAME")
DB_SSL_REQUIREMENT = os.getenv("DB_SSL_REQUIREMENT")


def get_pg_connection():
"""
Returns a connection to the Postgres database
"""
return psycopg2.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASS,
dbname=DB_NAME,
sslmode=DB_SSL_REQUIREMENT,
sslrootcert="/root/rds-combined-ca-bundle.pem",
)


def table_exists(conn, table_name):
"""
Checks if a table exists in a PostgreSQL database.
Args:
conn (psycopg2.extensions.connection): A connection to the PostgreSQL database.
table_name (str): The name of the table to check for existence.
Returns:
bool: True if the table exists, False otherwise.
"""
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = %s
);
""",
(table_name,),
)

result = cur.fetchone()
return result[0]

except Exception as e:
print(f"Error checking table existence: {e}")
return False


def read_and_group_csv(file_path):
"""
Returns a dict where each key is the lookup table name and the value
is a dict of all the lookup ids/descs for that lookup table
"""
grouped_data = {}

with open(file_path, newline="") as csvfile:
csvreader = csv.reader(csvfile, delimiter=",", quotechar='"')

# Skip the first row (header)
next(csvreader)

for row in csvreader:
key = row[0]
inner_dict = {"id": int(row[1]), "description": row[2]}

if key not in grouped_data:
grouped_data[key] = []

grouped_data[key].append(inner_dict)

return grouped_data


def escape_single_quotes(input_string):
return input_string.replace("'", "''")


def new_table(name):
return f"""
create table public.atd_txdot__{name}_lkp (
id serial primary key,
{name}_id integer not null,
{name}_desc varchar(255) not null
);
"""


def main(file_path):
data = read_and_group_csv(file_path)

# Pretty-print the grouped data as JSON
# print(json.dumps(data, indent=4))

pg = get_pg_connection()
cursor = pg.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
changes = []
down_changes = []

for table in data:
# here are tables which are special cases
# The states (as in United States) is non-uniform and does not need inspection.
# The counties are equally fixed.
if table in ["STATE_ID", "CNTY_ID"]:
continue

match = re.search(r"(^.*)_ID$", table)
name_component = match.group(1).lower()

print()
print("👀Looking into table: ", name_component)

table_name = "atd_txdot__" + name_component + "_lkp"
exists = table_exists(pg, table_name)
if not exists:
print("💥 Missing table: ", table_name)
changes.append(f"\n-- Adding table {table_name}")
changes.append(new_table(name_component))
down_changes.append(f"\n-- Dropping table {table_name}")
new_table_down = f"drop table if exists public.{table_name};"
down_changes.append(new_table_down)
for record in data[table]:
# We do not have a record on file with this ID
print(f"❓ Id {str(record['id'])} not found in {table_name}")
print(" CSV Value: ", record["description"])
print()
insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
changes.append(insert)
# Dont need down changes here because the down is just deleting the table

else:
is_first_change = True
for record in data[table]:
sql = f"""
select {name_component}_id as id, {name_component}_desc as description
from {table_name} where {name_component}_id = {str(record['id'])};
"""
cursor.execute(sql)
db_result = cursor.fetchone()
if db_result:
# We have a record on file with this ID
if db_result["description"] == record["description"]:
# print(f"✅ Value \"{record['description']}\" with id {str(record['id'])} found in {table_name}")
pass
else:
print(
f"❌ Id {str(record['id'])} found in {table_name} has a description mismatch:"
)
print(" CSV Value: ", record["description"])
print(" DB Value: ", db_result["description"])
print()
update = f"update public.{table_name} set {name_component}_desc = '{escape_single_quotes(record['description'])}' where {name_component}_id = {str(record['id'])};"
if is_first_change == True:
changes.append(f"\n-- Changes to table {table_name}")
down_changes.append(f"\n-- Changes to table {table_name}")
changes.append(update)
update_down = f"update public.{table_name} set {name_component}_desc = '{db_result['description']}' where {name_component}_id = {str(record['id'])};"
down_changes.append(update_down)
is_first_change = False
else:
# We do not have a record on file with this ID
# print(f"Value \"{record['description']}\" with id {str(record['id'])} not found in {table_name}")
print(f"❓ Id {str(record['id'])} not found in {table_name}")
print(" CSV Value: ", record["description"])
print()
insert = f"insert into public.{table_name} ({name_component}_id, {name_component}_desc) values ({str(record['id'])}, '{escape_single_quotes(record['description'])}');"
if is_first_change == True:
changes.append(f"\n-- Changes to table {table_name}")
down_changes.append(f"\n-- Changes to table {table_name}")
changes.append(insert)
insert_down = f"delete from public.{table_name} where {name_component}_id = {str(record['id'])};"
down_changes.append(insert_down)
is_first_change = False

print("\n🛠️ Here are the changes to be made:\n")
print("\n".join(changes).strip())

outfile = open("up_migration.sql", "w")
outfile.write("\n".join(changes).strip())
outfile.close()

outfile_down = open("down_migration.sql", "w")
outfile_down.write("\n".join(down_changes).strip())
outfile_down.close()


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input", help="extract file path")
args = parser.parse_args()
file_path = args.input

main(file_path)
3 changes: 3 additions & 0 deletions atd-toolbox/get_lookup_table_changes/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
psycopg2==2.9.3
psycopg2_binary==2.9.5
python-dotenv==1.0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__autonomous_level_engaged_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__autonomous_unit_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__inv_da_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__rpt_road_type_lkp
schema: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
table:
name: atd_txdot__trauma_centers_lkp
schema: public
Loading

0 comments on commit d374c20

Please sign in to comment.