From 70e3d18c7078fafbd7440270c6c5219f31cdf3c7 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Thu, 16 Nov 2023 11:05:31 -0800
Subject: [PATCH 01/15] pipeline to load BlackCat API data

---
 .../external_table_a10.yml                    |  68 ++++++
 .../external_table_a15.yml                    |  16 ++
 .../external_table_a30.yml                    |  86 ++++++++
 .../external_table_p10.yml                    |  74 +++++++
 .../external_table_p20.yml                    |  56 +++++
 .../external_table_p50.yml                    |  53 +++++
 .../external_table_rr20_intercity.yml         |  71 +++++++
 .../external_table_rr20_rural.yml             |  77 +++++++
 .../external_table_rr20_urban_tribal.yml      |  59 ++++++
 .../external_table_ss60.yml                   |  16 ++
 .../external_table_tam_narrative.yml          |  16 ++
 .../dags/ntd_report_validation/METADATA.yml   |  18 ++
 airflow/dags/ntd_report_validation/README.md  |   9 +
 .../a10_submitted_for_ntd.yml                 |   7 +
 .../a15_submitted_for_ntd.yml                 |   7 +
 .../a30_submitted_for_ntd.yml                 |   7 +
 .../p10_submitted_for_ntd.yml                 |   7 +
 .../p20_submitted_for_ntd.yml                 |   7 +
 .../p50_submitted_for_ntd.yml                 |   7 +
 .../rr20_intercity_submitted_for_ntd.yml      |   7 +
 .../rr20_rural_submitted_for_ntd.yml          |   7 +
 .../rr20_urban_tribal_submitted_for_ntd.yml   |   7 +
 .../ss60_submitted_for_ntd.yml                |   7 +
 .../tam_narrative_submitted_for_ntd.yml       |   7 +
 airflow/plugins/operators/__init__.py         |   1 +
 airflow/plugins/operators/blackcat_to_gcs.py  | 193 ++++++++++++++++++
 26 files changed, 890 insertions(+)
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml
 create mode 100644 airflow/dags/ntd_report_validation/METADATA.yml
 create mode 100644 airflow/dags/ntd_report_validation/README.md
 create mode 100644 airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
 create mode 100644 airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
 create mode 100644 airflow/plugins/operators/blackcat_to_gcs.py

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
new file mode 100644
index 0000000000..bf1d4cbabf
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
@@ -0,0 +1,68 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.a10_ntdreportingstationsandmaintenance
+  LIMIT 1;
+source_objects:
+  - "a10_NTDReportingStationsAndMaintenance/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.a10_ntdreportingstationsandmaintenance"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "a10_NTDReportingStationsAndMaintenance/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: service_mode
+    type: STRING
+    mode: NULLABLE
+  - name: pt_owned_by_service_provider
+    type: FLOAT64
+    mode: NULLABLE
+  - name: pt_owned_by_public_agency
+    type: FLOAT64
+    mode: NULLABLE
+  - name: pt_leased_by_public_agency
+    type: FLOAT64
+    mode: NULLABLE
+  - name: pt_leased_by_service_provider
+    type: FLOAT64
+    mode: NULLABLE
+  - name: do_owned
+    type: FLOAT64
+    mode: NULLABLE
+  - name: do_leased_by_public_agency
+    type: FLOAT64
+    mode: NULLABLE
+  - name: do_leased_from_private_entity
+    type: FLOAT64
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
+
+
+
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
new file mode 100644
index 0000000000..1e327030eb
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
@@ -0,0 +1,16 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.a15_ntdtransitassetmanagement
+  LIMIT 1;
+source_objects:
+  - "a15_NTDTransitAssetManagementA15/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.a15_ntdtransitassetmanagement"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "a15_NTDTransitAssetManagementA15/"
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
new file mode 100644
index 0000000000..beefa1982d
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
@@ -0,0 +1,86 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.a30_ntdassetandresourceinfo
+  LIMIT 1;
+source_objects:
+  - "a30_NTDAssetAndResourceInfo/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.a30_ntdassetandresourceinfo"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "a30_NTDAssetAndResourceInfo/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: vehicle_id
+    type: STRING
+    mode: NULLABLE
+  - name: vehicle_status
+    type: STRING
+    mode: NULLABLE
+  - name: vin
+    type: STRING
+    mode: NULLABLE
+  - name: ntdid
+    type: STRING
+    mode: NULLABLE
+  - name: ada_access
+    type: STRING
+    mode: NULLABLE
+  - name: vehicle_type
+    type: STRING
+    mode: NULLABLE
+  - name: fuel_type
+    type: STRING
+    mode: NULLABLE
+  - name: average_estimated_service_years_when_new
+    type: INTEGER
+    mode: NULLABLE
+  - name: average_expiration_years_when_new
+    type: INTEGER
+    mode: NULLABLE
+  - name: vehicle_year
+    type: INTEGER
+    mode: NULLABLE
+  - name: useful_life_years_remaining
+    type: INTEGER
+    mode: NULLABLE
+  - name: vehicle_length
+    type: STRING
+    mode: NULLABLE
+  - name: seating_capacity
+    type: STRING
+    mode: NULLABLE
+  - name: ownership_type
+    type: STRING
+    mode: NULLABLE
+  - name: modes_operated_display_text
+    type: STRING
+    mode: NULLABLE
+  - name: modes_operated_full_text
+    type: STRING
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
new file mode 100644
index 0000000000..fdb6a5aa45
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
@@ -0,0 +1,74 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.p10_ntdreporterbasicinfo
+  LIMIT 1;
+source_objects:
+  - "p10_NTDReportingP10/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.p10_ntdreporterbasicinfo"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "p10_NTDReportingP10/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: org_id
+    type: STRING
+    mode: NULLABLE
+  - name: user_id
+    type: STRING
+    mode: NULLABLE
+  - name: first_name
+    type: STRING
+    mode: NULLABLE
+  - name: last_name
+    type: STRING
+    mode: NULLABLE
+  - name: full_name
+    type: STRING
+    mode: NULLABLE
+  - name: text
+    type: STRING
+    mode: NULLABLE
+  - name: value
+    type: STRING
+    mode: NULLABLE
+  - name: group
+    type: STRING
+    mode: NULLABLE
+  - name: bool_value
+    type: BOOL
+    mode: NULLABLE
+  - name: primary_phone
+    type: STRING
+    mode: NULLABLE
+  - name: email
+    type: STRING
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
new file mode 100644
index 0000000000..6fd9370b8a
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
@@ -0,0 +1,56 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.p20_ntdreportermodes
+  LIMIT 1;
+source_objects:
+  - "p20_NTDReportingP20/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.p20_ntdreportermodes"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "p20_NTDReportingP20/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: service_mode
+    type: STRING
+    mode: NULLABLE
+  - name: type_of_service
+    type: STRING
+    mode: NULLABLE
+  - name: commitment_date
+    type: DATETIME
+    mode: NULLABLE
+  - name: start_date
+    type: DATETIME
+    mode: NULLABLE
+  - name: end_date
+    type: DATETIME
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
new file mode 100644
index 0000000000..78c6cf4dde
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
@@ -0,0 +1,53 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.p50_ntdreportergtfs
+  LIMIT 1;
+source_objects:
+  - "p50_NTDReportingP50/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.p50_ntdreportergtfs"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "p50_NTDReportingP50/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: mode
+    type: STRING
+    mode: NULLABLE
+  - name: type
+    type: STRING
+    mode: NULLABLE
+  - name: web_link
+    type: STRING
+    mode: NULLABLE
+  - name: file_path
+    type: STRING
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
new file mode 100644
index 0000000000..e52cbeabba
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
@@ -0,0 +1,71 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.rr20_intercity
+  LIMIT 1;
+source_objects:
+  - "rr20_NTDReportingRR20_Intercity/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.rr20_intercity"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "rr20_NTDReportingRR20_Intercity/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: item_id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: item
+    type: STRING
+    mode: NULLABLE
+  - name: type
+    type: STRING
+    mode: NULLABLE
+  - name: operations_expended
+    type: STRING
+    mode: NULLABLE
+  - name: capital_expended
+    type: STRING
+    mode: NULLABLE
+  - name: description
+    type: STRING
+    mode: NULLABLE
+  - name: annual_vehicle_rev_miles
+    type: FLOAT64
+    mode: NULLABLE
+  - name: regular_unlinked_passenger_trips
+    type: INTEGER
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
+
+
+
+
+
+
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
new file mode 100644
index 0000000000..bf85034c82
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
@@ -0,0 +1,77 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.rr20_rural
+  LIMIT 1;
+source_objects:
+  - "rr20_NTDReportingRR20_Rural/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.rr20_rural"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "rr20_NTDReportingRR20_Rural/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: item
+    type: STRING
+    mode: NULLABLE
+  - name: REVENUE
+    type: FLOAT64
+    mode: NULLABLE
+  - name: css_class
+    type: STRING
+    mode: NULLABLE
+  - name: operations_expended
+    type: STRING
+    mode: NULLABLE
+  - name: capital_expended
+    type: STRING
+    mode: NULLABLE
+  - name: description
+    type: STRING
+    mode: NULLABLE
+  - name: annual_vehicle_rev_miles
+    type: FLOAT64
+    mode: NULLABLE
+  - name: annual_vehicle_rev_hours
+    type: INTEGER
+    mode: NULLABLE
+  - name: annual_unlinked_pass_trips
+    type: FLOAT64
+    mode: NULLABLE
+  - name: annual_vehicle_max_service
+    type: INTEGER
+    mode: NULLABLE
+  - name: sponsored_service_upt
+    type: INTEGER
+    mode: NULLABLE
+  - name: quantity
+    type: INTEGER
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
\ No newline at end of file
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
new file mode 100644
index 0000000000..1522c542ab
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
@@ -0,0 +1,59 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.rr20_urban_tribal
+  LIMIT 1;
+source_objects:
+  - "rr20_NTDReportingRR20_Urban_Tribal/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.rr20_urban_tribal"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "rr20_NTDReportingRR20_Urban_Tribal/"
+schema_fields:
+  - name: api_report_id
+    type: STRING
+    mode: NULLABLE
+  - name: api_organization
+    type: STRING
+    mode: NULLABLE
+  - name: api_report_period
+    type: INTEGER
+    mode: NULLABLE
+  - name: api_report_status
+    type: STRING
+    mode: NULLABLE
+  - name: api_last_modified
+    type: TIMESTAMP
+    mode: NULLABLE
+  - name: id
+    type: STRING
+    mode: NULLABLE
+  - name: item_id
+    type: STRING
+    mode: NULLABLE
+  - name: report_id
+    type: STRING
+    mode: NULLABLE
+  - name: item
+    type: STRING
+    mode: NULLABLE
+  - name: type
+    type: STRING
+    mode: NULLABLE
+  - name: operations_expended
+    type: STRING
+    mode: NULLABLE
+  - name: capital_expended
+    type: STRING
+    mode: NULLABLE
+  - name: description
+    type: STRING
+    mode: NULLABLE
+  - name: last_modified_date
+    type: DATETIME
+    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
new file mode 100644
index 0000000000..576a286c71
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
@@ -0,0 +1,16 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.ss60_safety
+  LIMIT 1;
+source_objects:
+  - "ss60_SS60/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.ss60_safety"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "ss60_SS60/"
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml
new file mode 100644
index 0000000000..27e869a3f6
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml
@@ -0,0 +1,16 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.ntd_tamnarrative
+  LIMIT 1;
+source_objects:
+  - "ntd_NTDReportingTAMNarrative/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.ntd_tamnarrative"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "ntd_NTDReportingTAMNarrative/"
diff --git a/airflow/dags/ntd_report_validation/METADATA.yml b/airflow/dags/ntd_report_validation/METADATA.yml
new file mode 100644
index 0000000000..d8a59f4b6e
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/METADATA.yml
@@ -0,0 +1,18 @@
+description: "Process raw files from a GCS bucket (NTD reports from BlackCat API) directly into BigQuery"
+schedule_interval: "0 10 * * 1" #10 am every Monday
+tags:
+  - ntd, blackcat
+default_args:
+    owner: airflow
+    depends_on_past: False
+    start_date: "2023-10-02"
+    catchup: False
+    email:
+      - "kim.engie@slalom.com"
+      - "christian.suyat@dot.ca.gov"
+      - "katrina.kaiser@dot.ca.gov"
+    email_on_failure: True
+    pool: default_pool
+    concurrency: 50
+wait_for_defaults:
+    timeout: 3600
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/README.md b/airflow/dags/ntd_report_validation/README.md
new file mode 100644
index 0000000000..652eb0e2a6
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/README.md
@@ -0,0 +1,9 @@
+# `ntd_report_validation`
+
+Type: [Now|Scheduled](https://docs.calitp.org/data-infra/airflow/dags-maintenance.html)
+
+This DAG orchestrates the publishing and storing of data, in the form of NTD report submissions, first pushing API data into  Google Cloud Storage in the bucket `calitp-ntd-report-validation`.  
+  
+Another DAG (part of the `create_external_tables` existing DAG) reads the GCS data in BigQuery in the Cal-ITP data warehouse. The job will take the most recent file of each report type (which has all submitted reports by Caltrans 5311 subrecipients) and publish it into BigQuery `external` tables, if it is not yet there. This job uses the Cal-ITP existing infrastructure for creating external tables, outlined [here](https://docs.calitp.org/data-infra/architecture/data.html). 
+
+In the event of failure, the job can be rerun without backfilling. 
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
new file mode 100644
index 0000000000..c33a1120e7
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingStationsAndMaintenance" 
+form: "A-10"
+bq_table_name: "ntdreportingstationsandmaintenance"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
new file mode 100644
index 0000000000..ff58fc5b31
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDTransitAssetManagementA15" 
+form: "A-15"
+bq_table_name: "ntdtransitassetmanagement"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
new file mode 100644
index 0000000000..2c6176942b
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDAssetAndResourceInfo" 
+form: "A-30"
+bq_table_name: "ntdassetandresourceinfo"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
new file mode 100644
index 0000000000..75694c6f3f
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingP10" 
+form: "P-10"
+bq_table_name: "ntdreporterbasicinfo"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml
new file mode 100644
index 0000000000..de197b31c9
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingP20" 
+form: "P-20"
+bq_table_name: "ntdreportermodes"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
new file mode 100644
index 0000000000..46778e2ac2
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingP50" 
+form: "P-50"
+bq_table_name: "ntdreportergtfs"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
new file mode 100644
index 0000000000..34656c2aaf
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingRR20_Intercity" 
+form: "RR-20"
+bq_table_name: "intercity"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
new file mode 100644
index 0000000000..3bdb2d2137
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingRR20_Rural" 
+form: "RR-20"
+bq_table_name: "rural"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
new file mode 100644
index 0000000000..3c698de609
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingRR20_Urban_Tribal" 
+form: "RR-20"
+bq_table_name: "urban_tribal"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
new file mode 100644
index 0000000000..13d7d04ddd
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "SS60"
+form: "SS-60"
+bq_table_name: "safety"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
new file mode 100644
index 0000000000..fcdb3fbcac
--- /dev/null
+++ b/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
@@ -0,0 +1,7 @@
+operator: operators.BlackCatApiToGCSOperator
+
+bucket: "gs://calitp-ntd-report-validation"
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
+api_tablename: "NTDReportingTAMNarrative" 
+form: "NTD"
+bq_table_name: "tamnarrative"
\ No newline at end of file
diff --git a/airflow/plugins/operators/__init__.py b/airflow/plugins/operators/__init__.py
index 24fd5fd7ce..39271c95c8 100644
--- a/airflow/plugins/operators/__init__.py
+++ b/airflow/plugins/operators/__init__.py
@@ -6,3 +6,4 @@
 from operators.littlepay_raw_sync import LittlepayRawSync
 from operators.littlepay_to_jsonl import LittlepayToJSONL
 from operators.pod_operator import PodOperator
+from operators.blackcat_to_gcs import BlackCatApiToGCSOperator
diff --git a/airflow/plugins/operators/blackcat_to_gcs.py b/airflow/plugins/operators/blackcat_to_gcs.py
new file mode 100644
index 0000000000..b6927aceea
--- /dev/null
+++ b/airflow/plugins/operators/blackcat_to_gcs.py
@@ -0,0 +1,193 @@
+from calitp_data_infra.storage import get_fs, make_name_bq_safe
+from airflow.models import BaseOperator
+from pydantic import BaseModel
+from typing import Optional
+import pandas as pd
+import pendulum
+import requests
+import logging
+import gzip
+import os
+import re
+
+def write_to_log(logfilename):
+    '''
+    Creates a logger object that outputs to a log file, to the filename specified,
+    and also streams to console.
+    '''
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
+                                  datefmt='%y-%m-%d %H:%M:%S')
+    file_handler = logging.FileHandler(logfilename)
+    file_handler.setFormatter(formatter)
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(formatter)
+
+    if not logger.hasHandlers():
+        logger.addHandler(file_handler)
+        logger.addHandler(stream_handler)
+
+    return logger
+
+
+def camel_to_snake(name):
+    '''Converts Snake case to underscore separation for renaming columns; 
+    VehicleStatus becomes vehicle_status and 
+    can handle acroynms like ADAAccess, which becomes ada_access'''
+    name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
+
+
+class BlackCatApiExtract(BaseModel):
+    api_url: str
+    form: str
+    api_tablename: str
+    bq_table_name: str
+    data: Optional[pd.DataFrame]
+    logger: Optional[logging.Logger]
+    extract_time: Optional[pendulum.DateTime]
+
+    logger = write_to_log('load_bc_apidata_output.log')
+    extract_time = pendulum.now()
+    
+    # pydantic doesn't know dataframe type
+    # see https://stackoverflow.com/a/69200069
+    class Config:
+        arbitrary_types_allowed = True
+    
+    def fetch_from_bc_api(self):
+        """Download a BlackCat table as a DataFrame.
+
+        Note that BlackCat API reports have rows structured as follows:
+        [{'ReportId': <id>,
+        'Organization': <organization>,
+        'ReportPeriod': <year>,
+        'ReportStatus': <status>,
+        'ReportLastModifiedDate': <timestamp>,
+        '<table_name>': {'Data': [{colname: value, ...}, {colname: value, ...} ...]}},
+        {'ReportId': <id>, ...etc. to the next organization}]
+
+        This function applies renames in the following order.
+            1. rename column names from snakecase to names utilizing underscores
+            2. rename fields
+            3. apply column prefix (to columns not renamed by 1 or 2)
+        """
+        
+        self.logger.info(
+            f"Downloading BlackCat data for {self.extract_time.format('YYYY')}_{self.bq_table_name}."
+        )
+        response = requests.get(self.api_url, verify=False)
+        blob = response.json()
+
+        org_data = []
+
+        # Cyling through and processing each org's data
+        for x in blob:
+            report_id = x.get('ReportId')
+            org = x.get('Organization')
+            period = x.get('ReportPeriod')
+            status = x.get('ReportStatus')
+            last_mod = (pendulum.from_format(x.get('ReportLastModifiedDate'), 'MM/DD/YYYY HH:mm:ss A')
+                        .in_tz('America/Los_Angeles')
+                        .set(tz='UTC'))
+            iso = last_mod.to_iso8601_string()
+            
+            org_info_values = {'api_report_id': report_id, 'api_organization': org,
+                            'api_report_period': period, 'api_report_status': status,
+                            'api_last_modified': iso}
+            org_info_df = pd.DataFrame([org_info_values])
+            
+            table_json = x[self.api_tablename]['Data']
+            # checks for nested json entries, replaces any with only the 'Text' value from nested json. 
+            for x in table_json:
+                for k,v in x.items():
+                    if type(v) is dict:
+                        x[k] = x[k]['Text']
+            raw_df = pd.DataFrame.from_dict(table_json)
+            raw_df.rename(columns=lambda c: camel_to_snake(c), inplace=True)
+            whole_df = pd.concat([org_info_df, raw_df], axis=1).sort_values(by='api_organization')
+            
+            # Only the 1st row of data in org_info_df is filled, other rows have NAs. 
+            # Here we fill in the rest with the values
+            whole_df = whole_df.fillna(value=org_info_values) 
+            org_data.append(whole_df)
+
+        raw_df = pd.concat(org_data)
+        raw_df.rename(columns=lambda c: camel_to_snake(c), inplace=True)
+
+        self.data = raw_df.rename(make_name_bq_safe, axis="columns")
+        self.logger.info(
+            f"Downloaded {self.extract_time.format('YYYY')}_{self.bq_table_name} data with {len(self.data)} rows!"
+        )
+
+    def make_hive_path(self, form: str, bucket: str):
+        if not self.extract_time:
+            raise ValueError(
+                "An extract time must be set before a hive path can be generated."
+            )
+        bq_form_name = (
+            str.lower(form).replace("-", "")
+        )
+        return os.path.join(
+            bucket,
+            f"{bq_form_name}_{self.api_tablename}",
+            f"dt={self.extract_time.to_date_string()}",
+            f"ts={self.extract_time.to_iso8601_string()}",
+            f"{bq_form_name}_{self.bq_table_name}.jsonl.gz",
+        )
+
+    def save_to_gcs(self, fs, bucket):
+        hive_path = self.make_hive_path(self.form, bucket)
+        self.logger.info(f"Uploading to GCS at {hive_path}")
+        if len(self.data) == 0:
+            self.logger.info(f"There is no data for {self.api_tablename}, not saving anything. Pipeline exiting.")
+            pass
+        else:
+            fs.pipe(
+                hive_path,
+                gzip.compress(self.data.to_json(orient="records", lines=True).encode()),
+            )
+        return hive_path
+
+
+class BlackCatApiToGCSOperator(BaseOperator):
+    template_fields = ("bucket",)
+
+    def __init__(
+        self,
+        bucket,
+        api_url,
+        form,
+        api_tablename,
+        bq_table_name,
+        **kwargs,
+    ):
+        """An operator that downloads data from a BlackCat API
+            and saves it as a JSON file hive-partitioned by date in Google Cloud
+            Storage (GCS).
+
+        Args:
+            bucket (str): GCS bucket where the scraped BlackCat report will be saved.
+            api_url (str): The URL to hit that gets the data.
+            api_tablename (str): The table that should be extracted from the BlackCat API. 
+                MUST MATCH THE API JSON EXACTLY
+            bq_table_name (str): The table name that will be given in BigQuery. Appears in the GCS bucket path and the filename.
+            form: the NTD form that this report belongs to. E.g., RR-20, A-10, etc.                
+        """
+        self.bucket = bucket
+        # Instantiating an instance of the BlackCatApiExtract()
+        self.extract = BlackCatApiExtract(
+            api_url=api_url,
+            form=form,
+            api_tablename=api_tablename,
+            bq_table_name=bq_table_name,
+        )
+
+        super().__init__(**kwargs)
+
+    def execute(self, **kwargs):
+        fs = get_fs()
+        self.extract.fetch_from_bc_api()
+        # inserts into xcoms
+        return self.extract.save_to_gcs(fs, self.bucket)

From c0c8ae86af30c5ff66926ec5929a18b48aebdb3e Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Wed, 22 Nov 2023 10:27:23 -0800
Subject: [PATCH 02/15] pull API data into 1 table, first dbt models

---
 .../external_table_all_ntdreports.yml         |  19 ++
 .../a10_submitted_for_ntd.yml                 |   7 -
 .../a15_submitted_for_ntd.yml                 |   7 -
 .../a30_submitted_for_ntd.yml                 |   7 -
 ...ntd.yml => all_2023_submitted_for_ntd.yml} |   8 +-
 .../p10_submitted_for_ntd.yml                 |   7 -
 .../p50_submitted_for_ntd.yml                 |   7 -
 .../rr20_intercity_submitted_for_ntd.yml      |   7 -
 .../rr20_rural_submitted_for_ntd.yml          |   7 -
 .../rr20_urban_tribal_submitted_for_ntd.yml   |   7 -
 .../ss60_submitted_for_ntd.yml                |   7 -
 .../tam_narrative_submitted_for_ntd.yml       |   7 -
 .../int_ntd_rr20_service_alldata.sql          |  77 ++++++
 .../int_ntd_rr20_service_ratios.py            |  71 ++++++
 .../ntd_validation/int_ntd_validation.yml     |  15 ++
 .../ntd_validation/_mart_ntd_validation.yml   |   4 +
 .../fct_ntd_rr20_service_checks.py            | 223 ++++++++++++++++++
 .../ntd_validation/_src_api_externaltable.yml |  21 ++
 .../stg_2022_rr20_exp_by_mode.sql             |   5 +
 .../stg_2022_rr20_financial.sql               |   5 +
 .../ntd_validation/stg_2022_rr20_service.sql  |   5 +
 .../staging/ntd_validation/stg_2023_a10.sql   |  18 ++
 .../ntd_validation/stg_2023_rr20_rural.sql    |  23 ++
 .../stg_2023_rr20_urban_tribal.sql            |  15 ++
 .../ntd_validation/stg_ntd_subrecipients.sql  |   3 +
 25 files changed, 508 insertions(+), 74 deletions(-)
 create mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
 delete mode 100644 airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
 rename airflow/dags/ntd_report_validation/{p20_submitted_for_ntd.yml => all_2023_submitted_for_ntd.yml} (52%)
 delete mode 100644 airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
 delete mode 100644 airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
 create mode 100644 warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
 create mode 100644 warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
 create mode 100644 warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2023_a10.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
new file mode 100644
index 0000000000..aa98e6e59a
--- /dev/null
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
@@ -0,0 +1,19 @@
+operator: operators.ExternalTable
+bucket: gs://calitp-ntd-report-validation
+prefix_bucket: true
+post_hook: |
+  SELECT *
+  FROM `{{ get_project_id() }}`.external_blackcat.all_2023_ntdreports
+  LIMIT 1;
+source_objects:
+  - "all_2023_NTDReporting/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.all_2023_ntdreports"
+source_format: NEWLINE_DELIMITED_JSON
+use_bq_client: true
+hive_options:
+  mode: AUTO
+  require_partition_filter: false
+  source_uri_prefix: "all_2023_NTDReporting/"
+
+
+
diff --git a/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
deleted file mode 100644
index c33a1120e7..0000000000
--- a/airflow/dags/ntd_report_validation/a10_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingStationsAndMaintenance" 
-form: "A-10"
-bq_table_name: "ntdreportingstationsandmaintenance"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
deleted file mode 100644
index ff58fc5b31..0000000000
--- a/airflow/dags/ntd_report_validation/a15_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDTransitAssetManagementA15" 
-form: "A-15"
-bq_table_name: "ntdtransitassetmanagement"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
deleted file mode 100644
index 2c6176942b..0000000000
--- a/airflow/dags/ntd_report_validation/a30_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDAssetAndResourceInfo" 
-form: "A-30"
-bq_table_name: "ntdassetandresourceinfo"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml
similarity index 52%
rename from airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml
rename to airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml
index de197b31c9..c01b9a0515 100644
--- a/airflow/dags/ntd_report_validation/p20_submitted_for_ntd.yml
+++ b/airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml
@@ -1,7 +1,7 @@
 operator: operators.BlackCatApiToGCSOperator
 
-bucket: "gs://calitp-ntd-report-validation"
+bucket: "gs://test-calitp-ntd-report-validation"
 api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingP20" 
-form: "P-20"
-bq_table_name: "ntdreportermodes"
\ No newline at end of file
+api_tablename: "2023_NTDReporting" 
+form: "all"
+bq_table_name: "2023_ntdreports"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
deleted file mode 100644
index 75694c6f3f..0000000000
--- a/airflow/dags/ntd_report_validation/p10_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingP10" 
-form: "P-10"
-bq_table_name: "ntdreporterbasicinfo"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
deleted file mode 100644
index 46778e2ac2..0000000000
--- a/airflow/dags/ntd_report_validation/p50_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingP50" 
-form: "P-50"
-bq_table_name: "ntdreportergtfs"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
deleted file mode 100644
index 34656c2aaf..0000000000
--- a/airflow/dags/ntd_report_validation/rr20_intercity_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingRR20_Intercity" 
-form: "RR-20"
-bq_table_name: "intercity"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
deleted file mode 100644
index 3bdb2d2137..0000000000
--- a/airflow/dags/ntd_report_validation/rr20_rural_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingRR20_Rural" 
-form: "RR-20"
-bq_table_name: "rural"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
deleted file mode 100644
index 3c698de609..0000000000
--- a/airflow/dags/ntd_report_validation/rr20_urban_tribal_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingRR20_Urban_Tribal" 
-form: "RR-20"
-bq_table_name: "urban_tribal"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
deleted file mode 100644
index 13d7d04ddd..0000000000
--- a/airflow/dags/ntd_report_validation/ss60_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "SS60"
-form: "SS-60"
-bq_table_name: "safety"
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml b/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
deleted file mode 100644
index fcdb3fbcac..0000000000
--- a/airflow/dags/ntd_report_validation/tam_narrative_submitted_for_ntd.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-operator: operators.BlackCatApiToGCSOperator
-
-bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "NTDReportingTAMNarrative" 
-form: "NTD"
-bq_table_name: "tamnarrative"
\ No newline at end of file
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
new file mode 100644
index 0000000000..ff65dcd991
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
@@ -0,0 +1,77 @@
+------
+--- Compiles data for RR-20 Service checks from all years into one table for future computation
+------
+
+--- The 2022 data was *not* from the API and so formatted differently
+--- We are *assuming* that data in 2024 and onwards will be the same format as 2023
+--- If you get errors in 2024, check which columns may differ and read errors carefully.
+
+---TO DO: insert parameter for loop, for each year, do what 2023 is doing, 
+--- and at the end, add another union statement
+with data_2023 as (
+    select 
+    organization,
+    api_report_period as fiscal_year,
+    item as mode,
+    description as operating_capital,
+    CASE
+        WHEN description = "Operating Expenses" THEN operations_expended
+        WHEN description = "Capital Expenses" THEN capital_expended
+        ELSE Null
+    END as Total_Annual_Expenses_By_Mode,
+    annual_vehicle_rev_miles as Annual_VRM,
+    annual_vehicle_rev_hours as Annual_VRH,
+    annual_unlinked_pass_trips as Annual_UPT,
+    sponsored_service_upt as Sponsored_UPT,
+    annual_vehicle_max_service as VOMX
+    from {{ ref('stg_2023_rr20_rural') }}
+    WHERE type = "Expenses by Mode"
+),
+
+service2022 as (
+    select 
+    Organization_Legal_Name as organization,
+    Fiscal_Year as  fiscal_year,
+    Mode as mode,
+    Annual_VRM,
+    Annual_VRH,
+    Annual_UPT,
+    Sponsored_UPT,
+    VOMX
+    from {{ ref('stg_2022_rr20_service') }}
+),
+
+expenses2022 as (
+    select 
+    Organization_Legal_Name as organization,
+    Fiscal_Year as  fiscal_year,
+    Operating_Capital as operating_capital,
+    Mode as mode,
+    Total_Annual_Expenses_By_Mode
+    FROM {{ ref('stg_2022_rr20_exp_by_mode') }}
+),
+
+all_2022 as (
+    select service2022.organization,
+        service2022.fiscal_year,
+        service2022.mode,
+        expenses2022.operating_capital,
+        expenses2022.Total_Annual_Expenses_By_Mode,
+        service2022.Annual_VRM,
+        service2022.Annual_VRH,
+        service2022.Annual_UPT,
+        service2022.Sponsored_UPT,
+        service2022.VOMX
+from service2022
+FULL OUTER JOIN expenses2022 
+    ON service2022.organization = expenses2022.organization
+    AND service2022.fiscal_year = expenses2022.fiscal_year
+    AND service2022.mode = expenses2022.mode
+)
+
+select * FROM all_2022
+
+UNION ALL
+
+select * from data_2023
+
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
new file mode 100644
index 0000000000..24f89bc8b2
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
@@ -0,0 +1,71 @@
+
+import pyspark.sql.functions as F
+import pandas as pd
+import logging
+import pyspark
+
+
+def write_to_log(logfilename):
+    '''
+    Creates a logger object that outputs to a log file, to the filename specified,
+    and also streams to console.
+    '''
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
+                                  datefmt='%y-%m-%d %H:%M:%S')
+    file_handler = logging.FileHandler(logfilename)
+    file_handler.setFormatter(formatter)
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(formatter)
+
+    if not logger.hasHandlers():
+        logger.addHandler(file_handler)
+        logger.addHandler(stream_handler)
+
+    return logger
+
+def make_ratio_cols(df, numerator, denominator, col_name, logger, operation="sum"):
+    if col_name is not None:
+        # If a user specify a column name, use it
+        # Raise error if the column already exists
+        if col_name in df.columns:
+            logger.info(f"Dataframe already has column '{col_name}'")
+            raise ValueError(f"Dataframe already has column '{col_name}'")
+            
+        else:
+            _col_name = col_name
+            
+    if operation == "sum":    
+        df = (df.groupby(['organization','mode', 'fiscal_year'])
+              .apply(lambda x: x.assign(**{_col_name:
+                     lambda x: x[numerator].sum() / x[denominator]}))
+                    )
+    # else do not sum the numerator columns
+    else:
+        df = (df.groupby(['organization','mode', 'fiscal_year'])
+              .apply(lambda x: x.assign(**{_col_name:
+                     lambda x: x[numerator] / x[denominator]}))
+                    )
+    return df
+
+
+def model(dbt, session):
+    # Set up the logger object
+    logger = write_to_log('rr20_servicechecks_log.log')
+    
+    #Load data from BigQuery - pass in the dbt model that we draw from.
+    allyears = dbt.ref("int_ntd_rr20_service_alldata")
+    allyears = allyears.toPandas()
+
+    # Calculate needed ratios, added as new columns
+    numeric_columns = allyears.select_dtypes(include=['number']).columns
+    allyears[numeric_columns] = allyears[numeric_columns].fillna(0)
+    
+    allyears = make_ratio_cols(allyears, 'Total_Annual_Expenses_By_Mode', 'Annual_VRH', 'cost_per_hr', logger)
+    allyears = make_ratio_cols(allyears, 'Annual_VRM', 'VOMX', 'miles_per_veh', logger)
+    allyears = make_ratio_cols(allyears, 'Total_Annual_Expenses_By_Mode', 'Annual_UPT', 'fare_rev_per_trip', logger)
+    allyears = make_ratio_cols(allyears, 'Annual_VRM', 'Annual_VRH', 'rev_speed', logger, operation = "mean")
+    allyears = make_ratio_cols(allyears,  'Annual_UPT', 'Annual_VRH', 'trips_per_hr', logger, operation = "mean")
+
+    return allyears
\ No newline at end of file
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
new file mode 100644
index 0000000000..d5affd54f9
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
@@ -0,0 +1,15 @@
+version: 2
+
+models:
+  - name: int_rr20_financial
+    description: | 
+      the RR-20 data that pertains to financial reporting. 
+    # tests:
+    #   - dbt_utils.expression_is_true:
+    #       expression: 'status != {{ guidelines_to_be_assessed_status() }}'
+    # columns:
+  - name: int_ntd_rr20_service_ratios
+    description: |
+      makes ratios for validation checks
+    config:
+      materialized: table
\ No newline at end of file
diff --git a/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml b/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
new file mode 100644
index 0000000000..43ae3000c4
--- /dev/null
+++ b/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
@@ -0,0 +1,4 @@
+version: 2
+
+models:
+  - name: fct_ntd_rr20_service_checks
\ No newline at end of file
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
new file mode 100644
index 0000000000..832a8961db
--- /dev/null
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
@@ -0,0 +1,223 @@
+import pandas as pd
+import datetime
+import logging
+
+##### TO_DO: see if the missing data check can still work or did we already fill it with zeros
+
+def write_to_log(logfilename):
+    '''
+    Creates a logger object that outputs to a log file, to the filename specified,
+    and also streams to console.
+    '''
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
+                                  datefmt='%y-%m-%d %H:%M:%S')
+    file_handler = logging.FileHandler(logfilename)
+    file_handler.setFormatter(formatter)
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(formatter)
+
+    if not logger.hasHandlers():
+        logger.addHandler(file_handler)
+        logger.addHandler(stream_handler)
+
+    return logger
+
+
+def check_rr20_ratios(df, variable, threshold, this_year, last_year, logger):
+    '''Validation checks where a ratio must be within a certain threshold limit
+    compared to the previous year.'''
+    agencies = df['organization'].unique()
+    output = []
+    for agency in agencies:
+        agency_df = df[df['organization']==agency]
+        logger.info(f"Checking {agency} for {variable} info.")
+        if len(agency_df) > 0:
+            
+            # Check whether data for both years is present
+            if (len(agency_df[agency_df['fiscal_year']==this_year]) > 0) \
+                & (len(agency_df[agency_df['fiscal_year']==last_year]) > 0): 
+
+                for mode in agency_df[(agency_df['fiscal_year']==this_year)]['mode'].unique():
+                    value_thisyr = (round(agency_df[(agency_df['mode']==mode)
+                                          & (agency_df['fiscal_year'] == this_year)]
+                                  [variable].unique()[0], 2))
+                    if len(agency_df[(agency_df['mode']==mode) & (agency_df['fiscal_year'] == last_year)][variable]) == 0:
+                        value_lastyr = 0
+                    else:
+                        value_lastyr = (round(agency_df[(agency_df['mode']==mode)
+                                          & (agency_df['fiscal_year'] == last_year)]
+                                  [variable].unique()[0], 2))
+                    
+                    if (value_lastyr == 0) and (abs(value_thisyr - value_lastyr) >= threshold):
+                        result = "fail"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = (f"The {variable} for {mode} has changed from last year by > = {threshold*100}%, please provide a narrative justification.")
+                    elif (value_lastyr != 0) and abs((value_lastyr - value_thisyr)/value_lastyr) >= threshold:
+                        result = "fail"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = (f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%, please provide a narrative justification.")
+                    else:
+                        result = "pass"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = ""
+
+                    output_line = {"Organization": agency,
+                                   "name_of_check" : check_name,
+                                   "mode": mode,
+                                   "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
+                                   "check_status": result,
+                                   "Description": description}
+                    output.append(output_line)
+        else:
+            logger.info(f"There is no data for {agency}")
+    checks = pd.DataFrame(output).sort_values(by="Organization")
+    return checks
+
+
+def check_single_number(df, variable, this_year, last_year, logger, threshold=None,):
+    '''Validation checks where a single number must be within a certain threshold limit
+    compared to the previous year.'''
+    agencies = df['organization'].unique()
+    output = []
+    for agency in agencies:
+
+        if len(df[df['organization']==agency]) > 0:
+            logger.info(f"Checking {agency} for {variable} info.")
+            # Check whether data for both years is present, if so perform prior yr comparison.
+            if (len(df[(df['organization']==agency) & (df['fiscal_year']==this_year)]) > 0) \
+                & (len(df[(df['organization']==agency) & (df['fiscal_year']==last_year)]) > 0): 
+
+                for mode in df[(df['organization'] == agency) & (df['fiscal_year']==this_year)]['mode'].unique():
+                    value_thisyr = (round(df[(df['organization'] == agency) 
+                                          & (df['mode']==mode)
+                                          & (df['fiscal_year'] == this_year)]
+                                  [variable].unique()[0], 2))
+                    # If there's no data for last yr:
+                    if len(df[(df['organization'] == agency) 
+                                          & (df['mode']==mode)
+                                          & (df['fiscal_year'] == last_year)][variable]) == 0:
+                        value_lastyr = 0
+                    else:
+                        value_lastyr = (round(df[(df['organization'] == agency) 
+                                          & (df['mode']==mode)
+                                          & (df['fiscal_year'] == last_year)]
+                                  [variable].unique()[0], 2))
+                    
+                    if (round(value_thisyr)==0 and round(value_lastyr) != 0) | (round(value_thisyr)!=0 and round(value_lastyr) == 0):
+                        result = "fail"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = (f"The {variable} for {mode} has changed either from or to zero compared to last year. Please provide a narrative justification.")
+                    # run only the above check on whether something changed from zero to non-zero, if no threshold is given
+                    elif threshold==None:
+                        result = "pass"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = ""
+                        pass
+                    # also check for pct change, if a threshold parameter is passed into function
+                    elif (value_lastyr == 0) and (abs(value_thisyr - value_lastyr) >= threshold):
+                        result = "fail"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = (f"The {variable} for {mode} was 0 last year and has changed by > = {threshold*100}%, please provide a narrative justification.")
+                    elif (value_lastyr != 0) and abs((value_lastyr - value_thisyr)/value_lastyr) >= threshold:
+                        result = "fail"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = (f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%; please provide a narrative justification.")                        
+                    else:
+                        result = "pass"
+                        check_name = f"{variable}"
+                        mode = mode
+                        description = ""
+
+                    output_line = {"Organization": agency,
+                           "name_of_check" : check_name,
+                                   "mode": mode,
+                            "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
+                            "check_status": result,
+                            "Description": description}
+                    output.append(output_line)
+        else:
+            logger.info(f"There is no data for {agency}")
+    checks = pd.DataFrame(output).sort_values(by="Organization")
+    return checks
+
+
+def model(dbt, session):
+    # Set up the logger object
+    logger = write_to_log('rr20_ftc_servicechecks_log.log')
+
+    this_year=datetime.datetime.now().year
+    last_year = this_year-1
+    this_date=datetime.datetime.now().date().strftime('%Y-%m-%d') #for suffix on Excel files
+
+    #Load data from BigQuery - pass in the dbt model that we draw from.
+    allyears = dbt.ref("int_ntd_rr20_service_ratios")
+    allyears = allyears.toPandas()
+
+    # Run validation checks
+    cph_checks = check_rr20_ratios(allyears, 'cost_per_hr', .30, this_year, last_year, logger)
+    mpv_checks = check_rr20_ratios(allyears, 'miles_per_veh', .20, this_year, last_year, logger)
+    vrm_checks = check_single_number(allyears, 'Annual_VRM', this_year, last_year, logger, threshold=.30)
+    frpt_checks = check_rr20_ratios(allyears, 'fare_rev_per_trip', .25, this_year, last_year, logger)
+    rev_speed_checks = check_rr20_ratios(allyears, 'rev_speed', .15, this_year, last_year, logger)
+    tph_checks = check_rr20_ratios(allyears, 'trips_per_hr', .30, this_year, last_year, logger)
+    voms0_check = check_single_number(allyears, 'VOMX', this_year, last_year, logger)
+
+    # Combine checks into one table
+    rr20_checks = pd.concat([cph_checks, mpv_checks, vrm_checks, 
+                             frpt_checks, rev_speed_checks, 
+                             tph_checks, voms0_check], 
+                             ignore_index=True).sort_values(by="Organization")
+    
+    ## Part 1: save Excel file to GCS
+    GCS_FILE_PATH_VALIDATED = f"gs://calitp-ntd-report-validation/validation_reports_{this_year}" 
+    with pd.ExcelWriter(f"{GCS_FILE_PATH_VALIDATED}/rr20_service_check_report_{this_date}.xlsx") as writer:
+        rr20_checks.to_excel(writer, sheet_name="rr20_checks_full", index=False, startrow=2)
+
+        workbook = writer.book
+        worksheet = writer.sheets["rr20_checks_full"]
+        cell_highlight = workbook.add_format({
+            'fg_color': 'yellow',
+            'bold': True,
+            'border': 1
+        })
+        report_title = "NTD Data Validation Report"
+        title_format = workbook.add_format({
+                'bold': True,
+                'valign': 'center',
+                'align': 'left',
+                'font_color': '#1c639e',
+                'font_size': 15
+                })
+        subtitle = "Reduced Reporting RR-20: Validation Warnings"
+        subtitle_format = workbook.add_format({
+            'bold': True,
+            'align': 'left',
+            'font_color': 'black',
+            'font_size': 19
+            })
+        
+        worksheet.write('A1', report_title, title_format)
+        worksheet.merge_range('A2:C2', subtitle, subtitle_format)
+        worksheet.write('G3', 'Agency Response', cell_highlight)
+        worksheet.write('H3', 'Response Date', cell_highlight)
+        worksheet.set_column(0, 0, 35) #col A width
+        worksheet.set_column(1, 3, 22) #cols B-D width
+        worksheet.set_column(4, 4, 11) #col D width
+        worksheet.set_column(5, 6, 53) #col E-G width
+        worksheet.freeze_panes('B4')
+
+    logger.info(f"RR-20 service data checks conducted on {this_date} is complete!")
+
+    ## Part 2: send table to BigQuery
+    return rr20_checks
+
+
diff --git a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
new file mode 100644
index 0000000000..345982b566
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
@@ -0,0 +1,21 @@
+version: 2
+
+sources:
+  - name: ntd_report_validation
+    description: |
+      Data from BlackCat API.
+    database: "{{ env_var('DBT_SOURCE_DATABASE', var('SOURCE_DATABASE')) }}"
+    schema: external_blackcat
+    tables:
+      - name: all_2023_ntdreports
+      - name: a10_ntdreportingstationsandmaintenance
+      - name: a15_ntdtransitassetmanagement
+      - name: a30_ntdassetandresourceinfo
+      - name: ntd_tamnarrative
+      - name: p10_ntdreporterbasicinfo
+      - name: p20_ntdreportermodes
+      - name: p50_ntdreportergtfs
+      - name: rr20_intercity
+      - name: rr20_rural
+      - name: rr20_urban_tribal
+      - name: ss60_safety
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql b/warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql
new file mode 100644
index 0000000000..ea678124c7
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql
@@ -0,0 +1,5 @@
+--- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
+--- We pull these tables in to use them in later int and fct models
+SELECT 
+    * 
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_expenses_by_mode`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql b/warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql
new file mode 100644
index 0000000000..0409fbc451
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql
@@ -0,0 +1,5 @@
+--- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
+--- We pull these tables in to use them in later int and fct models
+SELECT 
+    * 
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_financials__2`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql b/warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql
new file mode 100644
index 0000000000..919ce31487
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql
@@ -0,0 +1,5 @@
+--- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
+--- We pull these tables in to use them in later int and fct models
+SELECT 
+    * 
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_service_data`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_a10.sql b/warehouse/models/staging/ntd_validation/stg_2023_a10.sql
new file mode 100644
index 0000000000..85db82f56f
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2023_a10.sql
@@ -0,0 +1,18 @@
+SELECT 
+  organization,
+  reportstatus as api_report_status,
+  TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
+  reportperiod as api_report_period,
+  a10.id as id,
+  a10.ReportId as report_id,
+  a10.ServiceMode as service_mode,
+  a10.PTOwnedByServiceProvider as pt_owned_by_service_provider,
+  a10.PTOwnedByPublicAgency as pt_owned_by_public_agency,
+  a10.PTLeasedByPublicAgency as pt_leased_by_public_agency,
+  a10.PTLeasedByServiceProvider as pt_leased_by_service_provider,
+  a10.DOOwned as do_owned,
+  a10.DOLeasedByPublicAgency as do_leased_by_public_agency,
+  a10.DOLeasedFromPrivateEntity as do_leased_from_private_entity,
+  a10.LastModifiedDate as last_modified_date
+FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+, UNNEST (`ntdreportingstationsandmaintenance_data`) as `a10`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql b/warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql
new file mode 100644
index 0000000000..b753a23584
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql
@@ -0,0 +1,23 @@
+SELECT 
+  organization,
+  reportstatus as api_report_status,
+  TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
+  reportperiod as api_report_period,
+  ntdreportingrr20_rural_data.id as id,
+  ntdreportingrr20_rural_data.ReportId as report_id,
+  ntdreportingrr20_rural_data.Item as item,
+  ntdreportingrr20_rural_data.Revenue as revenue,
+  ntdreportingrr20_rural_data.Type as type,
+  ntdreportingrr20_rural_data.CSSClass as css_class,
+  ntdreportingrr20_rural_data.OperationsExpended as operations_expended,
+  ntdreportingrr20_rural_data.CapitalExpended as capital_expended,
+  ntdreportingrr20_rural_data.Description as description,
+  ntdreportingrr20_rural_data.AnnualVehicleRevMiles as annual_vehicle_rev_miles,
+  ntdreportingrr20_rural_data.AnnualVehicleRevHours as annual_vehicle_rev_hours,
+  ntdreportingrr20_rural_data.AnnualUnlinkedPassTrips as annual_unlinked_pass_trips,
+  ntdreportingrr20_rural_data.AnnualVehicleMaxService as annual_vehicle_max_service,
+  ntdreportingrr20_rural_data.SponsoredServiceUPT as sponsored_service_upt,
+  ntdreportingrr20_rural_data.Quantity as quantity,
+  ntdreportingrr20_rural_data.LastModifiedDate as last_modified_date
+FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+, UNNEST (`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql b/warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql
new file mode 100644
index 0000000000..0b871e0ff8
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql
@@ -0,0 +1,15 @@
+SELECT 
+  organization,
+  reportstatus as api_report_status,
+  TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
+  reportperiod as api_report_period,
+  ntdreportingrr20_urban_tribal_data.id as id,
+  ntdreportingrr20_urban_tribal_data.ReportId as report_id,
+  ntdreportingrr20_urban_tribal_data.ItemId as item_id,
+  ntdreportingrr20_urban_tribal_data.Item as item,
+  ntdreportingrr20_urban_tribal_data.OperationsExpended as operations_expended,
+  ntdreportingrr20_urban_tribal_data.CapitalExpended as capital_expended,
+  ntdreportingrr20_urban_tribal_data.Description as description,
+  ntdreportingrr20_urban_tribal_data.LastModifiedDate as last_modified_date
+FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+, UNNEST (`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
new file mode 100644
index 0000000000..50c5208254
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
@@ -0,0 +1,3 @@
+SELECT
+    Organization as organization 
+FROM blackcat_raw.2023_organizations
\ No newline at end of file

From 9ef1361551d3e197d831044edda4974b58ebcfce Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Wed, 22 Nov 2023 14:19:02 -0800
Subject: [PATCH 03/15] simplify external tables dag

---
 .../external_table_a10.yml                    | 68 ---------------
 .../external_table_a15.yml                    | 16 ----
 .../external_table_a30.yml                    | 86 -------------------
 .../external_table_p10.yml                    | 74 ----------------
 .../external_table_p20.yml                    | 56 ------------
 .../external_table_p50.yml                    | 53 ------------
 .../external_table_rr20_intercity.yml         | 71 ---------------
 .../external_table_rr20_rural.yml             | 77 -----------------
 .../external_table_rr20_urban_tribal.yml      | 59 -------------
 .../external_table_ss60.yml                   | 16 ----
 .../external_table_tam_narrative.yml          | 16 ----
 airflow/plugins/operators/blackcat_to_gcs.py  | 58 ++-----------
 12 files changed, 9 insertions(+), 641 deletions(-)
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
 delete mode 100644 airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
deleted file mode 100644
index bf1d4cbabf..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a10.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.a10_ntdreportingstationsandmaintenance
-  LIMIT 1;
-source_objects:
-  - "a10_NTDReportingStationsAndMaintenance/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.a10_ntdreportingstationsandmaintenance"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "a10_NTDReportingStationsAndMaintenance/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: service_mode
-    type: STRING
-    mode: NULLABLE
-  - name: pt_owned_by_service_provider
-    type: FLOAT64
-    mode: NULLABLE
-  - name: pt_owned_by_public_agency
-    type: FLOAT64
-    mode: NULLABLE
-  - name: pt_leased_by_public_agency
-    type: FLOAT64
-    mode: NULLABLE
-  - name: pt_leased_by_service_provider
-    type: FLOAT64
-    mode: NULLABLE
-  - name: do_owned
-    type: FLOAT64
-    mode: NULLABLE
-  - name: do_leased_by_public_agency
-    type: FLOAT64
-    mode: NULLABLE
-  - name: do_leased_from_private_entity
-    type: FLOAT64
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
-
-
-
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
deleted file mode 100644
index 1e327030eb..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a15.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.a15_ntdtransitassetmanagement
-  LIMIT 1;
-source_objects:
-  - "a15_NTDTransitAssetManagementA15/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.a15_ntdtransitassetmanagement"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "a15_NTDTransitAssetManagementA15/"
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
deleted file mode 100644
index beefa1982d..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_a30.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.a30_ntdassetandresourceinfo
-  LIMIT 1;
-source_objects:
-  - "a30_NTDAssetAndResourceInfo/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.a30_ntdassetandresourceinfo"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "a30_NTDAssetAndResourceInfo/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: vehicle_id
-    type: STRING
-    mode: NULLABLE
-  - name: vehicle_status
-    type: STRING
-    mode: NULLABLE
-  - name: vin
-    type: STRING
-    mode: NULLABLE
-  - name: ntdid
-    type: STRING
-    mode: NULLABLE
-  - name: ada_access
-    type: STRING
-    mode: NULLABLE
-  - name: vehicle_type
-    type: STRING
-    mode: NULLABLE
-  - name: fuel_type
-    type: STRING
-    mode: NULLABLE
-  - name: average_estimated_service_years_when_new
-    type: INTEGER
-    mode: NULLABLE
-  - name: average_expiration_years_when_new
-    type: INTEGER
-    mode: NULLABLE
-  - name: vehicle_year
-    type: INTEGER
-    mode: NULLABLE
-  - name: useful_life_years_remaining
-    type: INTEGER
-    mode: NULLABLE
-  - name: vehicle_length
-    type: STRING
-    mode: NULLABLE
-  - name: seating_capacity
-    type: STRING
-    mode: NULLABLE
-  - name: ownership_type
-    type: STRING
-    mode: NULLABLE
-  - name: modes_operated_display_text
-    type: STRING
-    mode: NULLABLE
-  - name: modes_operated_full_text
-    type: STRING
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
deleted file mode 100644
index fdb6a5aa45..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p10.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.p10_ntdreporterbasicinfo
-  LIMIT 1;
-source_objects:
-  - "p10_NTDReportingP10/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.p10_ntdreporterbasicinfo"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "p10_NTDReportingP10/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: org_id
-    type: STRING
-    mode: NULLABLE
-  - name: user_id
-    type: STRING
-    mode: NULLABLE
-  - name: first_name
-    type: STRING
-    mode: NULLABLE
-  - name: last_name
-    type: STRING
-    mode: NULLABLE
-  - name: full_name
-    type: STRING
-    mode: NULLABLE
-  - name: text
-    type: STRING
-    mode: NULLABLE
-  - name: value
-    type: STRING
-    mode: NULLABLE
-  - name: group
-    type: STRING
-    mode: NULLABLE
-  - name: bool_value
-    type: BOOL
-    mode: NULLABLE
-  - name: primary_phone
-    type: STRING
-    mode: NULLABLE
-  - name: email
-    type: STRING
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
deleted file mode 100644
index 6fd9370b8a..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p20.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.p20_ntdreportermodes
-  LIMIT 1;
-source_objects:
-  - "p20_NTDReportingP20/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.p20_ntdreportermodes"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "p20_NTDReportingP20/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: service_mode
-    type: STRING
-    mode: NULLABLE
-  - name: type_of_service
-    type: STRING
-    mode: NULLABLE
-  - name: commitment_date
-    type: DATETIME
-    mode: NULLABLE
-  - name: start_date
-    type: DATETIME
-    mode: NULLABLE
-  - name: end_date
-    type: DATETIME
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
deleted file mode 100644
index 78c6cf4dde..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_p50.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.p50_ntdreportergtfs
-  LIMIT 1;
-source_objects:
-  - "p50_NTDReportingP50/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.p50_ntdreportergtfs"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "p50_NTDReportingP50/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: mode
-    type: STRING
-    mode: NULLABLE
-  - name: type
-    type: STRING
-    mode: NULLABLE
-  - name: web_link
-    type: STRING
-    mode: NULLABLE
-  - name: file_path
-    type: STRING
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
deleted file mode 100644
index e52cbeabba..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_intercity.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.rr20_intercity
-  LIMIT 1;
-source_objects:
-  - "rr20_NTDReportingRR20_Intercity/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.rr20_intercity"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "rr20_NTDReportingRR20_Intercity/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: item_id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: item
-    type: STRING
-    mode: NULLABLE
-  - name: type
-    type: STRING
-    mode: NULLABLE
-  - name: operations_expended
-    type: STRING
-    mode: NULLABLE
-  - name: capital_expended
-    type: STRING
-    mode: NULLABLE
-  - name: description
-    type: STRING
-    mode: NULLABLE
-  - name: annual_vehicle_rev_miles
-    type: FLOAT64
-    mode: NULLABLE
-  - name: regular_unlinked_passenger_trips
-    type: INTEGER
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
-
-
-
-
-
-
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
deleted file mode 100644
index bf85034c82..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_rural.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.rr20_rural
-  LIMIT 1;
-source_objects:
-  - "rr20_NTDReportingRR20_Rural/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.rr20_rural"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "rr20_NTDReportingRR20_Rural/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: item
-    type: STRING
-    mode: NULLABLE
-  - name: REVENUE
-    type: FLOAT64
-    mode: NULLABLE
-  - name: css_class
-    type: STRING
-    mode: NULLABLE
-  - name: operations_expended
-    type: STRING
-    mode: NULLABLE
-  - name: capital_expended
-    type: STRING
-    mode: NULLABLE
-  - name: description
-    type: STRING
-    mode: NULLABLE
-  - name: annual_vehicle_rev_miles
-    type: FLOAT64
-    mode: NULLABLE
-  - name: annual_vehicle_rev_hours
-    type: INTEGER
-    mode: NULLABLE
-  - name: annual_unlinked_pass_trips
-    type: FLOAT64
-    mode: NULLABLE
-  - name: annual_vehicle_max_service
-    type: INTEGER
-    mode: NULLABLE
-  - name: sponsored_service_upt
-    type: INTEGER
-    mode: NULLABLE
-  - name: quantity
-    type: INTEGER
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
\ No newline at end of file
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
deleted file mode 100644
index 1522c542ab..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_rr20_urban_tribal.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.rr20_urban_tribal
-  LIMIT 1;
-source_objects:
-  - "rr20_NTDReportingRR20_Urban_Tribal/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.rr20_urban_tribal"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "rr20_NTDReportingRR20_Urban_Tribal/"
-schema_fields:
-  - name: api_report_id
-    type: STRING
-    mode: NULLABLE
-  - name: api_organization
-    type: STRING
-    mode: NULLABLE
-  - name: api_report_period
-    type: INTEGER
-    mode: NULLABLE
-  - name: api_report_status
-    type: STRING
-    mode: NULLABLE
-  - name: api_last_modified
-    type: TIMESTAMP
-    mode: NULLABLE
-  - name: id
-    type: STRING
-    mode: NULLABLE
-  - name: item_id
-    type: STRING
-    mode: NULLABLE
-  - name: report_id
-    type: STRING
-    mode: NULLABLE
-  - name: item
-    type: STRING
-    mode: NULLABLE
-  - name: type
-    type: STRING
-    mode: NULLABLE
-  - name: operations_expended
-    type: STRING
-    mode: NULLABLE
-  - name: capital_expended
-    type: STRING
-    mode: NULLABLE
-  - name: description
-    type: STRING
-    mode: NULLABLE
-  - name: last_modified_date
-    type: DATETIME
-    mode: NULLABLE
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
deleted file mode 100644
index 576a286c71..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_ss60.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.ss60_safety
-  LIMIT 1;
-source_objects:
-  - "ss60_SS60/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.ss60_safety"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "ss60_SS60/"
diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml
deleted file mode 100644
index 27e869a3f6..0000000000
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_tam_narrative.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-operator: operators.ExternalTable
-bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
-post_hook: |
-  SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.ntd_tamnarrative
-  LIMIT 1;
-source_objects:
-  - "ntd_NTDReportingTAMNarrative/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.ntd_tamnarrative"
-source_format: NEWLINE_DELIMITED_JSON
-use_bq_client: true
-hive_options:
-  mode: AUTO
-  require_partition_filter: false
-  source_uri_prefix: "ntd_NTDReportingTAMNarrative/"
diff --git a/airflow/plugins/operators/blackcat_to_gcs.py b/airflow/plugins/operators/blackcat_to_gcs.py
index b6927aceea..96a68f865c 100644
--- a/airflow/plugins/operators/blackcat_to_gcs.py
+++ b/airflow/plugins/operators/blackcat_to_gcs.py
@@ -31,14 +31,6 @@ def write_to_log(logfilename):
     return logger
 
 
-def camel_to_snake(name):
-    '''Converts Snake case to underscore separation for renaming columns; 
-    VehicleStatus becomes vehicle_status and 
-    can handle acroynms like ADAAccess, which becomes ada_access'''
-    name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
-    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
-
-
 class BlackCatApiExtract(BaseModel):
     api_url: str
     form: str
@@ -78,43 +70,10 @@ def fetch_from_bc_api(self):
             f"Downloading BlackCat data for {self.extract_time.format('YYYY')}_{self.bq_table_name}."
         )
         response = requests.get(self.api_url, verify=False)
-        blob = response.json()
-
-        org_data = []
-
-        # Cyling through and processing each org's data
-        for x in blob:
-            report_id = x.get('ReportId')
-            org = x.get('Organization')
-            period = x.get('ReportPeriod')
-            status = x.get('ReportStatus')
-            last_mod = (pendulum.from_format(x.get('ReportLastModifiedDate'), 'MM/DD/YYYY HH:mm:ss A')
-                        .in_tz('America/Los_Angeles')
-                        .set(tz='UTC'))
-            iso = last_mod.to_iso8601_string()
-            
-            org_info_values = {'api_report_id': report_id, 'api_organization': org,
-                            'api_report_period': period, 'api_report_status': status,
-                            'api_last_modified': iso}
-            org_info_df = pd.DataFrame([org_info_values])
-            
-            table_json = x[self.api_tablename]['Data']
-            # checks for nested json entries, replaces any with only the 'Text' value from nested json. 
-            for x in table_json:
-                for k,v in x.items():
-                    if type(v) is dict:
-                        x[k] = x[k]['Text']
-            raw_df = pd.DataFrame.from_dict(table_json)
-            raw_df.rename(columns=lambda c: camel_to_snake(c), inplace=True)
-            whole_df = pd.concat([org_info_df, raw_df], axis=1).sort_values(by='api_organization')
-            
-            # Only the 1st row of data in org_info_df is filled, other rows have NAs. 
-            # Here we fill in the rest with the values
-            whole_df = whole_df.fillna(value=org_info_values) 
-            org_data.append(whole_df)
-
-        raw_df = pd.concat(org_data)
-        raw_df.rename(columns=lambda c: camel_to_snake(c), inplace=True)
+        blob = response.json()  
+        
+        raw_df = pd.json_normalize(blob)
+        raw_df['ReportLastModifiedDate'] = raw_df['ReportLastModifiedDate'].astype('datetime64[ns]')
 
         self.data = raw_df.rename(make_name_bq_safe, axis="columns")
         self.logger.info(
@@ -151,7 +110,7 @@ def save_to_gcs(self, fs, bucket):
         return hive_path
 
 
-class BlackCatApiToGCSOperator(BaseOperator):
+class BlackCatApiToGCSOperator2(BaseOperator):
     template_fields = ("bucket",)
 
     def __init__(
@@ -163,9 +122,10 @@ def __init__(
         bq_table_name,
         **kwargs,
     ):
-        """An operator that downloads data from a BlackCat API
-            and saves it as a JSON file hive-partitioned by date in Google Cloud
-            Storage (GCS).
+        """An operator that downloads all data from a BlackCat API
+            and saves it as one JSON file hive-partitioned by date in Google Cloud
+            Storage (GCS). Each org's data will be in 1 row, and for each separate table in the API, 
+            a nested column will hold all of it's data. 
 
         Args:
             bucket (str): GCS bucket where the scraped BlackCat report will be saved.

From 3da73864d8f93a3b53b0ec5dbb0bee52b231d0e5 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Tue, 28 Nov 2023 10:38:40 -0800
Subject: [PATCH 04/15] start dag for excel publishing, dbt cleaning for rr20
 service

---
 .../external_table_all_ntdreports.yml         |  2 +-
 .../METADATA.yml                              |  0
 .../README.md                                 |  2 +-
 .../all_2023_submitted_for_ntd.yml            |  2 +-
 .../METADATA.yml                              | 18 +++++
 .../ntd_report_publish_validation/README.md   |  7 ++
 .../publish_validation_report.py              | 75 +++++++++++++++++++
 airflow/plugins/operators/blackcat_to_gcs.py  |  2 +-
 .../int_ntd_rr20_service_alldata.sql          |  6 +-
 .../ntd_validation/_src_api_externaltable.yml | 14 +---
 ....sql => stg_ntd_2022_rr20_exp_by_mode.sql} |  0
 ...al.sql => stg_ntd_2022_rr20_financial.sql} |  0
 ...vice.sql => stg_ntd_2022_rr20_service.sql} |  0
 ...{stg_2023_a10.sql => stg_ntd_2023_a10.sql} |  2 +-
 ..._rural.sql => stg_ntd_2023_rr20_rural.sql} |  2 +-
 ...sql => stg_ntd_2023_rr20_urban_tribal.sql} |  3 +-
 16 files changed, 113 insertions(+), 22 deletions(-)
 rename airflow/dags/{ntd_report_validation => ntd_report_from_blackcat}/METADATA.yml (100%)
 rename airflow/dags/{ntd_report_validation => ntd_report_from_blackcat}/README.md (96%)
 rename airflow/dags/{ntd_report_validation => ntd_report_from_blackcat}/all_2023_submitted_for_ntd.yml (81%)
 create mode 100644 airflow/dags/ntd_report_publish_validation/METADATA.yml
 create mode 100644 airflow/dags/ntd_report_publish_validation/README.md
 create mode 100644 airflow/dags/ntd_report_publish_validation/publish_validation_report.py
 rename warehouse/models/staging/ntd_validation/{stg_2022_rr20_exp_by_mode.sql => stg_ntd_2022_rr20_exp_by_mode.sql} (100%)
 rename warehouse/models/staging/ntd_validation/{stg_2022_rr20_financial.sql => stg_ntd_2022_rr20_financial.sql} (100%)
 rename warehouse/models/staging/ntd_validation/{stg_2022_rr20_service.sql => stg_ntd_2022_rr20_service.sql} (100%)
 rename warehouse/models/staging/ntd_validation/{stg_2023_a10.sql => stg_ntd_2023_a10.sql} (91%)
 rename warehouse/models/staging/ntd_validation/{stg_2023_rr20_rural.sql => stg_ntd_2023_rr20_rural.sql} (94%)
 rename warehouse/models/staging/ntd_validation/{stg_2023_rr20_urban_tribal.sql => stg_ntd_2023_rr20_urban_tribal.sql} (85%)

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
index aa98e6e59a..ad5fe5517c 100644
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
@@ -1,6 +1,6 @@
 operator: operators.ExternalTable
 bucket: gs://calitp-ntd-report-validation
-prefix_bucket: true
+prefix_bucket: false
 post_hook: |
   SELECT *
   FROM `{{ get_project_id() }}`.external_blackcat.all_2023_ntdreports
diff --git a/airflow/dags/ntd_report_validation/METADATA.yml b/airflow/dags/ntd_report_from_blackcat/METADATA.yml
similarity index 100%
rename from airflow/dags/ntd_report_validation/METADATA.yml
rename to airflow/dags/ntd_report_from_blackcat/METADATA.yml
diff --git a/airflow/dags/ntd_report_validation/README.md b/airflow/dags/ntd_report_from_blackcat/README.md
similarity index 96%
rename from airflow/dags/ntd_report_validation/README.md
rename to airflow/dags/ntd_report_from_blackcat/README.md
index 652eb0e2a6..4189a8a187 100644
--- a/airflow/dags/ntd_report_validation/README.md
+++ b/airflow/dags/ntd_report_from_blackcat/README.md
@@ -1,4 +1,4 @@
-# `ntd_report_validation`
+# `ntd_report_from_blackcat`
 
 Type: [Now|Scheduled](https://docs.calitp.org/data-infra/airflow/dags-maintenance.html)
 
diff --git a/airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml b/airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml
similarity index 81%
rename from airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml
rename to airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml
index c01b9a0515..ce0acd3528 100644
--- a/airflow/dags/ntd_report_validation/all_2023_submitted_for_ntd.yml
+++ b/airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml
@@ -1,6 +1,6 @@
 operator: operators.BlackCatApiToGCSOperator
 
-bucket: "gs://test-calitp-ntd-report-validation"
+bucket: "gs://calitp-ntd-report-validation"
 api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
 api_tablename: "2023_NTDReporting" 
 form: "all"
diff --git a/airflow/dags/ntd_report_publish_validation/METADATA.yml b/airflow/dags/ntd_report_publish_validation/METADATA.yml
new file mode 100644
index 0000000000..0f97d22bff
--- /dev/null
+++ b/airflow/dags/ntd_report_publish_validation/METADATA.yml
@@ -0,0 +1,18 @@
+description: "Process BigQuery tables into Excel files, save in GCS"
+schedule_interval: "0 20 * * 1" #8 pm every Monday
+tags:
+  - ntd, blackcat
+default_args:
+    owner: airflow
+    depends_on_past: False
+    start_date: "2023-10-02"
+    catchup: False
+    email:
+      - "kim.engie@slalom.com"
+      - "christian.suyat@dot.ca.gov"
+      - "katrina.kaiser@dot.ca.gov"
+    email_on_failure: True
+    pool: default_pool
+    concurrency: 50
+wait_for_defaults:
+    timeout: 3600
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_publish_validation/README.md b/airflow/dags/ntd_report_publish_validation/README.md
new file mode 100644
index 0000000000..28977df52d
--- /dev/null
+++ b/airflow/dags/ntd_report_publish_validation/README.md
@@ -0,0 +1,7 @@
+# `ntd_report_publish_validation`
+
+Type: [Now|Scheduled](https://docs.calitp.org/data-infra/airflow/dags-maintenance.html)
+
+This DAG orchestrates the publishing of NTD Report validation checks in the form of Excel files, that it saves into Google Cloud Storage. Checks conducted on submitted NTD report submissions, previously stored into BigQuery with dbt models. They are then converted to Excel files and saves in the Google Cloud Storage bucket `calitp-ntd-report-validation`.  
+
+In the event of failure, the job can be rerun without backfilling. 
\ No newline at end of file
diff --git a/airflow/dags/ntd_report_publish_validation/publish_validation_report.py b/airflow/dags/ntd_report_publish_validation/publish_validation_report.py
new file mode 100644
index 0000000000..7b5b4e9e82
--- /dev/null
+++ b/airflow/dags/ntd_report_publish_validation/publish_validation_report.py
@@ -0,0 +1,75 @@
+# ---
+# python_callable: publish_report
+# provide_context: true
+# ---
+from google.cloud import bigquery
+import pandas as pd
+import datetime
+import re
+
+import google.auth
+import google.auth.transport.requests
+
+import pendulum
+from calitp_data_infra.storage import (
+    fetch_all_in_partition,
+    get_fs,
+)
+
+
+def publish_report():
+    client = bigquery.Client()
+    print("Got BG client!")
+    project = "cal-itp-data-infra-staging"
+    dataset_id = "staging_staging"
+
+    dataset_ref = bigquery.DatasetReference(project, dataset_id)
+    table_ref = dataset_ref.table("fct_ntd_rr20_service_checks")
+    table = client.get_table(table_ref)
+    print("Got table!")
+
+    df = client.list_rows(table).to_dataframe()
+    print("Got df from BQ!")
+    print(df.head())
+
+
+    # this_year=datetime.datetime.now().year
+    # ## Part 1: save Excel file to GCS (for emailing to subrecipients)
+    # GCS_FILE_PATH_VALIDATED = f"gs://calitp-ntd-report-validation/validation_reports_{this_year}" 
+    # with pd.ExcelWriter(f"{GCS_FILE_PATH_VALIDATED}/rr20_service_check_report_{this_date}.xlsx") as writer:
+    #     rr20_checks.to_excel(writer, sheet_name="rr20_checks_full", index=False, startrow=2)
+
+    #     workbook = writer.book
+    #     worksheet = writer.sheets["rr20_checks_full"]
+    #     cell_highlight = workbook.add_format({
+    #         'fg_color': 'yellow',
+    #         'bold': True,
+    #         'border': 1
+    #     })
+    #     report_title = "NTD Data Validation Report"
+    #     title_format = workbook.add_format({
+    #             'bold': True,
+    #             'valign': 'center',
+    #             'align': 'left',
+    #             'font_color': '#1c639e',
+    #             'font_size': 15
+    #             })
+    #     subtitle = "Reduced Reporting RR-20: Validation Warnings"
+    #     subtitle_format = workbook.add_format({
+    #         'bold': True,
+    #         'align': 'left',
+    #         'font_color': 'black',
+    #         'font_size': 19
+    #         })
+        
+    #     worksheet.write('A1', report_title, title_format)
+    #     worksheet.merge_range('A2:C2', subtitle, subtitle_format)
+    #     worksheet.write('G3', 'Agency Response', cell_highlight)
+    #     worksheet.write('H3', 'Response Date', cell_highlight)
+    #     worksheet.set_column(0, 0, 35) #col A width
+    #     worksheet.set_column(1, 3, 22) #cols B-D width
+    #     worksheet.set_column(4, 4, 11) #col D width
+    #     worksheet.set_column(5, 6, 53) #col E-G width
+    #     worksheet.freeze_panes('B4')
+    
+
diff --git a/airflow/plugins/operators/blackcat_to_gcs.py b/airflow/plugins/operators/blackcat_to_gcs.py
index 96a68f865c..bf9d85d8e2 100644
--- a/airflow/plugins/operators/blackcat_to_gcs.py
+++ b/airflow/plugins/operators/blackcat_to_gcs.py
@@ -110,7 +110,7 @@ def save_to_gcs(self, fs, bucket):
         return hive_path
 
 
-class BlackCatApiToGCSOperator2(BaseOperator):
+class BlackCatApiToGCSOperator(BaseOperator):
     template_fields = ("bucket",)
 
     def __init__(
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
index ff65dcd991..8c21daf559 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
@@ -24,7 +24,7 @@ with data_2023 as (
     annual_unlinked_pass_trips as Annual_UPT,
     sponsored_service_upt as Sponsored_UPT,
     annual_vehicle_max_service as VOMX
-    from {{ ref('stg_2023_rr20_rural') }}
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
     WHERE type = "Expenses by Mode"
 ),
 
@@ -38,7 +38,7 @@ service2022 as (
     Annual_UPT,
     Sponsored_UPT,
     VOMX
-    from {{ ref('stg_2022_rr20_service') }}
+    from {{ ref('stg_ntd_2022_rr20_service') }}
 ),
 
 expenses2022 as (
@@ -48,7 +48,7 @@ expenses2022 as (
     Operating_Capital as operating_capital,
     Mode as mode,
     Total_Annual_Expenses_By_Mode
-    FROM {{ ref('stg_2022_rr20_exp_by_mode') }}
+    FROM {{ ref('stg_ntd_2022_rr20_exp_by_mode') }}
 ),
 
 all_2022 as (
diff --git a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
index 345982b566..1dc10d2e19 100644
--- a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
+++ b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
@@ -3,19 +3,9 @@ version: 2
 sources:
   - name: ntd_report_validation
     description: |
-      Data from BlackCat API.
+      Data from BlackCat API. Each org's data is be in 1 row, and for each separate table in the API, 
+            a nested column holds all of it's data. 
     database: "{{ env_var('DBT_SOURCE_DATABASE', var('SOURCE_DATABASE')) }}"
     schema: external_blackcat
     tables:
       - name: all_2023_ntdreports
-      - name: a10_ntdreportingstationsandmaintenance
-      - name: a15_ntdtransitassetmanagement
-      - name: a30_ntdassetandresourceinfo
-      - name: ntd_tamnarrative
-      - name: p10_ntdreporterbasicinfo
-      - name: p20_ntdreportermodes
-      - name: p50_ntdreportergtfs
-      - name: rr20_intercity
-      - name: rr20_rural
-      - name: rr20_urban_tribal
-      - name: ss60_safety
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
similarity index 100%
rename from warehouse/models/staging/ntd_validation/stg_2022_rr20_exp_by_mode.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
similarity index 100%
rename from warehouse/models/staging/ntd_validation/stg_2022_rr20_financial.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
diff --git a/warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
similarity index 100%
rename from warehouse/models/staging/ntd_validation/stg_2022_rr20_service.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_a10.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
similarity index 91%
rename from warehouse/models/staging/ntd_validation/stg_2023_a10.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
index 85db82f56f..2cda0460f6 100644
--- a/warehouse/models/staging/ntd_validation/stg_2023_a10.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
@@ -14,5 +14,5 @@ SELECT
   a10.DOLeasedByPublicAgency as do_leased_by_public_agency,
   a10.DOLeasedFromPrivateEntity as do_leased_from_private_entity,
   a10.LastModifiedDate as last_modified_date
-FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
 , UNNEST (`ntdreportingstationsandmaintenance_data`) as `a10`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
similarity index 94%
rename from warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
index b753a23584..f68ef04afb 100644
--- a/warehouse/models/staging/ntd_validation/stg_2023_rr20_rural.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
@@ -19,5 +19,5 @@ SELECT
   ntdreportingrr20_rural_data.SponsoredServiceUPT as sponsored_service_upt,
   ntdreportingrr20_rural_data.Quantity as quantity,
   ntdreportingrr20_rural_data.LastModifiedDate as last_modified_date
-FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
 , UNNEST (`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
\ No newline at end of file
diff --git a/warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
similarity index 85%
rename from warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql
rename to warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
index 0b871e0ff8..7b21fa5f34 100644
--- a/warehouse/models/staging/ntd_validation/stg_2023_rr20_urban_tribal.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
@@ -11,5 +11,6 @@ SELECT
   ntdreportingrr20_urban_tribal_data.CapitalExpended as capital_expended,
   ntdreportingrr20_urban_tribal_data.Description as description,
   ntdreportingrr20_urban_tribal_data.LastModifiedDate as last_modified_date
-FROM `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
+-- `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
 , UNNEST (`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
\ No newline at end of file

From ebe996e07bd352a3a2af135986351db6e428f459 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Tue, 28 Nov 2023 11:11:35 -0800
Subject: [PATCH 05/15] remove excel conversion from dbt model

---
 .../fct_ntd_rr20_service_checks.py            | 38 -------------------
 1 file changed, 38 deletions(-)

diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
index 832a8961db..750fffb6f9 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
@@ -176,44 +176,6 @@ def model(dbt, session):
                              frpt_checks, rev_speed_checks, 
                              tph_checks, voms0_check], 
                              ignore_index=True).sort_values(by="Organization")
-    
-    ## Part 1: save Excel file to GCS
-    GCS_FILE_PATH_VALIDATED = f"gs://calitp-ntd-report-validation/validation_reports_{this_year}" 
-    with pd.ExcelWriter(f"{GCS_FILE_PATH_VALIDATED}/rr20_service_check_report_{this_date}.xlsx") as writer:
-        rr20_checks.to_excel(writer, sheet_name="rr20_checks_full", index=False, startrow=2)
-
-        workbook = writer.book
-        worksheet = writer.sheets["rr20_checks_full"]
-        cell_highlight = workbook.add_format({
-            'fg_color': 'yellow',
-            'bold': True,
-            'border': 1
-        })
-        report_title = "NTD Data Validation Report"
-        title_format = workbook.add_format({
-                'bold': True,
-                'valign': 'center',
-                'align': 'left',
-                'font_color': '#1c639e',
-                'font_size': 15
-                })
-        subtitle = "Reduced Reporting RR-20: Validation Warnings"
-        subtitle_format = workbook.add_format({
-            'bold': True,
-            'align': 'left',
-            'font_color': 'black',
-            'font_size': 19
-            })
-        
-        worksheet.write('A1', report_title, title_format)
-        worksheet.merge_range('A2:C2', subtitle, subtitle_format)
-        worksheet.write('G3', 'Agency Response', cell_highlight)
-        worksheet.write('H3', 'Response Date', cell_highlight)
-        worksheet.set_column(0, 0, 35) #col A width
-        worksheet.set_column(1, 3, 22) #cols B-D width
-        worksheet.set_column(4, 4, 11) #col D width
-        worksheet.set_column(5, 6, 53) #col E-G width
-        worksheet.freeze_panes('B4')
 
     logger.info(f"RR-20 service data checks conducted on {this_date} is complete!")
 

From 9d1a834a73e8a4fdf95b73e0368b8f7d6af526f3 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Thu, 30 Nov 2023 16:04:47 -0800
Subject: [PATCH 06/15] adding rr20 financial checks

---
 .../int_ntd_rr20_financial_fare_revenues.sql  |  55 ++++++++
 .../int_ntd_rr20_financial_specific_funds.sql |  50 +++++++
 .../int_ntd_rr20_financial_total_exp.sql      |  54 ++++++++
 .../fct_ntd_rr20_equal_totals_check.sql       |  39 ++++++
 .../fct_ntd_rr20_funds_checks.sql             | 130 ++++++++++++++++++
 5 files changed, 328 insertions(+)
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
 create mode 100644 warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
 create mode 100644 warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
new file mode 100644
index 0000000000..810c14a8dc
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
@@ -0,0 +1,55 @@
+-- need fare rev and upt for each year. didn't write check correctly the first time
+
+WITH fare_rev_2023 as (
+  select 
+    organization,
+    api_report_period as fiscal_year,
+    item as mode,
+    operations_expended + capital_expended as Fare_Revenues,
+     from {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE type = "Fare Revenues"
+),
+upt_2023 as (
+  select 
+    organization,
+    api_report_period as fiscal_year,
+    item as mode,
+    annual_unlinked_pass_trips as Annual_UPT
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE type = "Service Data"
+),
+all_2023 as (
+  select fare_rev_2023.*, upt_2023.Annual_UPT
+  FROM fare_rev_2023 
+  FULL OUTER JOIN upt_2023
+    ON fare_rev_2023.organization = upt_2023.organization
+    AND fare_rev_2023.mode = upt_2023.mode
+),
+fare_rev_2022 as (
+  SELECT Organization_Legal_Name as organization,
+  Fiscal_Year as fiscal_year,
+  sum(Fare_Revenues) as Fare_Revenues
+  FROM {{ ref('stg_ntd_2022_rr20_financial') }}
+  GROUP BY organization, fiscal_year
+),
+upt_2022 as (
+  select 
+    Organization_Legal_Name as organization,
+    Fiscal_Year as  fiscal_year,
+    Mode as mode,
+    Annual_UPT
+from {{ ref('stg_ntd_2022_rr20_service') }}
+),
+all_2022 as (
+  select fare_rev_2022.organization, fare_rev_2022.fiscal_year,
+    upt_2022.Mode, fare_rev_2022.Fare_Revenues, upt_2022.Annual_UPT
+  FROM fare_rev_2022 
+  FULL OUTER JOIN upt_2022
+    ON fare_rev_2022.organization = upt_2022.organization
+)
+
+SELECT * from all_2023
+
+UNION ALL
+
+SELECT * from all_2022
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
new file mode 100644
index 0000000000..c3af6aa0c9
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
@@ -0,0 +1,50 @@
+-------
+-- NTD validation errors about these 1 specific funding sources. 
+--- ID #s RR20F-070, RR20F-065, RR20F-068, RR20F-066, RR20F-013. Sums the capital expenses across all funding sources
+--- In 2022 the data is a different format than 2023 **and onwards**. 
+--- Only needed for the 2023 error checking (to compare to "last year"). In 2024 you don't need 2022 data.
+-------
+
+WITH longform_2023 AS (
+    SELECT
+    organization,
+    api_report_period AS fiscal_year,
+    operations_expended + capital_expended AS total_expended,
+    REPLACE(
+      REPLACE(
+        REPLACE(item, 'FTA Formula Grants for Rural Areas (§5311)', 'FTA_Formula_Grants_for_Rural_Areas_5311'),
+        'Other Directly Generated Funds', 'Other_Directly_Generated_Funds'),
+    'Local Funds', 'Local_Funds') as item
+     FROM {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE item LIKE "%Directly Generated Funds%" OR
+      item LIKE "%Formula Grants for Rural Areas%" OR
+      item LIKE "Local Funds"
+),
+wide_2023 AS (
+    SELECT * FROM
+    (SELECT * FROM longform_2023)
+    PIVOT(AVG(total_expended) FOR item IN ('FTA_Formula_Grants_for_Rural_Areas_5311', 'Other_Directly_Generated_Funds', 'Local_Funds'))
+    ORDER BY organization
+),
+data_2022 AS (
+    SELECT Organization_Legal_Name as organization,
+        Fiscal_Year as fiscal_year,
+        SUM(Other_Directly_Generated_Funds) as Other_Directly_Generated_Funds_2022,
+        SUM(FTA_Formula_Grants_for_Rural_Areas_5311) as FTA_Formula_Grants_for_Rural_Areas_5311_2022,
+        Null as Local_Funds_2022
+    FROM {{ ref('stg_ntd_2022_rr20_financial') }}
+    GROUP BY 1,2
+    ORDER BY organization
+)
+
+select wide_2023.organization, 
+    wide_2023.FTA_Formula_Grants_for_Rural_Areas_5311 as FTA_Formula_Grants_for_Rural_Areas_5311_2023,
+    wide_2023.Other_Directly_Generated_Funds as Other_Directly_Generated_Funds_2023,
+    wide_2023.Local_Funds as Local_Funds_2023,
+    data_2022.FTA_Formula_Grants_for_Rural_Areas_5311_2022,
+    data_2022.Other_Directly_Generated_Funds_2022,
+    data_2022.Local_Funds_2022
+from wide_2023
+FULL OUTER JOIN data_2022
+    ON wide_2023.organization = data_2022.organization
+ORDER BY organization
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
new file mode 100644
index 0000000000..06211cc877
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
@@ -0,0 +1,54 @@
+------
+--- Compiles data for RR-20 Financial checks on total amounts (operating and capital)
+--- into one table for downstream validation checks. "Prior year" data not needed
+--- NTD error ID #s RR20F-001OA, RR20F-001C, RR20F-182
+------
+
+WITH total_operations_exp_2023 as(
+    select organization,
+    api_report_period as fiscal_year,
+    sum(operations_expended) as Total_Annual_Op_Expenses_by_Mode
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE css_class = "expense"
+     group by organization, api_report_period
+),
+total_capital_exp_bymode_2023 as (
+    select organization,
+    api_report_period as fiscal_year,
+    sum(capital_expended) as Total_Annual_Cap_Expenses_byMode
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE css_class = "expense"
+     group by organization, api_report_period
+),
+total_operations_rev_2023 as (
+    select organization,
+    api_report_period as fiscal_year,
+    sum(operations_expended) as Total_Annual_Op_Revenues_Expended
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+     WHERE css_class = "revenue"
+     group by organization, api_report_period
+),
+total_cap_exp_byfunds_2023 as (
+    select organization,
+    api_report_period as fiscal_year,
+    sum(capital_expended) as Total_Annual_Cap_Expenses_byFunds
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+    WHERE css_class = "revenue"
+     group by organization, api_report_period
+)
+
+SELECT 
+    total_operations_exp_2023.*,
+    total_capital_exp_bymode_2023.Total_Annual_Cap_Expenses_byMode,
+    total_operations_rev_2023.Total_Annual_Op_Revenues_Expended,
+    total_cap_exp_byfunds_2023.Total_Annual_Cap_Expenses_byFunds
+FROM total_operations_exp_2023
+FULL OUTER JOIN total_capital_exp_bymode_2023
+    ON total_operations_exp_2023.organization = total_capital_exp_bymode_2023.organization
+    AND total_operations_exp_2023.fiscal_year = total_capital_exp_bymode_2023.fiscal_year
+FULL OUTER JOIN total_operations_rev_2023
+    ON total_operations_exp_2023.organization = total_operations_rev_2023.organization
+    AND total_operations_exp_2023.fiscal_year = total_operations_rev_2023.fiscal_year
+FULL OUTER JOIN total_cap_exp_byfunds_2023
+    ON total_operations_exp_2023.organization = total_cap_exp_byfunds_2023.organization
+    AND total_operations_exp_2023.fiscal_year = total_cap_exp_byfunds_2023.fiscal_year
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
new file mode 100644
index 0000000000..5b523ea685
--- /dev/null
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
@@ -0,0 +1,39 @@
+
+WITH rr20f_0010a as (
+    select 
+    organization,
+    "RR20F-001OA: equal totalsfor operating expenses" as name_of_check,
+    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_byMode,0)) THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN check_status = "Fail" THEN "Total_Annual_Revenues_Expended should, but does not, equal Total_Annual_Expenses_by_Mode. Please provide a narrative justification."
+        WHEN check_status = "Pass" THEN ""
+        ELSE NULL
+        END as description,
+    COALESCE("Total_Annual_Revenues_Expended = $", ROUND(Total_Annual_Op_Revenues_Expended,0), 
+            ",Total_Annual_Expenses_by_Mode = $", ROUND(Total_Annual_Op_Expenses_byMode,0)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
+), 
+rr20f_001c as(
+    select 
+    organization,
+    "RR20F-001C: equal totals for capital expenses by mode and funding source expenditures" as name_of_check,
+    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN check_status = "Fail" THEN "The sum of Total Expenses for all modes for Uses of Capital does not equal the sum of all values entered for Directly Generated, Non-Federal and Federal Government Funds for Uses of Capital. Please revise or explain."
+        WHEN check_status = "Pass" THEN ""
+        ELSE NULL
+        END as description,
+    COALESCE("Total_Annual_Cap_Expenses_byMode = $", ROUND(Total_Annual_Cap_Expenses_byMode,0), 
+            ",Total_Annual_Cap_Expenses_byFunds = $", ROUND(Total_Annual_Cap_Expenses_byFunds,0)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
+)
+
+SELECT * FROM rr20f_0010a
+
+UNION ALL
+
+SELECT * FROM rr20f_001c
\ No newline at end of file
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql
new file mode 100644
index 0000000000..1e4c9ed6ca
--- /dev/null
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql
@@ -0,0 +1,130 @@
+--- We do identical CASE WHEN clauses in each CTE. The results determine 2 different column values but one can only specify 1 col/statement
+
+WITH rr20f_070 as (
+ select 
+    organization,
+    "RR20F-070: 5311 Funds not reported" as name_of_check,
+    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL 
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL 
+        THEN "The §5311 program is not listed as a revenue source in your report, Please double check and provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+),
+rr20f_066  as (
+    select 
+    organization,
+    "RR20F-066: change from zero" as name_of_check,
+    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL) AND 
+                    (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)) 
+                    OR
+                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL)) 
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL) AND 
+                    (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)) 
+                    OR
+                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL)) 
+        THEN "FTA_Formula_Grants_for_Rural_Areas_5311 funding changed either from or to zero compared to last year. Please provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2022 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2022,0) AS STRING),
+            "2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
+),
+rr20f_065 as (
+ select 
+    organization,
+    "RR20F-065: 5311 Funds same value" as name_of_check,
+    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL) AND
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)     
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL) AND
+                (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)     
+        THEN "You have identical values for FTA_Formula_Grants_for_Rural_Areas_5311 funding in 2022 and 2023, which is unusual. Please provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2022 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2022,0) AS STRING),
+            "2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+),
+rr20f_013 as (
+    select 
+    organization,
+    "RR20F-013: Other Directly Generated Funds same value" as name_of_check,
+    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL) AND  
+                (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL) AND
+                (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)     
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL) AND  
+                (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL) AND
+                (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)
+        THEN "You have identical values for Other_Directly_Generated_Funds funding in 2022 and 2023, which is unusual. Please provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2022 = ", CAST(ROUND(Other_Directly_Generated_Funds_2022,0) AS STRING),
+            "2023 = ", CAST(ROUND(Other_Directly_Generated_Funds_2023,0) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}  
+),
+rr20f_068 as (
+ select 
+    organization,
+    "RR20F-068: 5311 Funds rounded to thousand" as name_of_check,
+    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL 
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL 
+    THEN "FTA_Formula_Grants_for_Rural_Areas_5311 are rounded to the nearest thousand, but should be reported as exact values. Please double check and provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+),
+rr20f_024 as (
+    select 
+    organization,
+    "RR20F-024: Local Funds rounded to thousand" as name_of_check,
+    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0  AND Local_Funds_2023 IS NOT NULL 
+        THEN "Fail"
+        ELSE "Pass"
+        END as check_status,
+    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0 AND Local_Funds_2023 IS NOT NULL 
+        THEN "Local Funds are rounded to the nearest thousand, but should be reported as exact values. Please double check and provide a narrative justification."
+        ELSE ""
+        END AS description,
+    CONCAT("2023 = ", CAST(ROUND(Local_Funds_2023) AS STRING)) as value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}  
+)
+
+SELECT * FROM rr20f_070
+UNION ALL
+SELECT * FROM rr20f_066
+UNION ALL
+SELECT * FROM rr20f_065
+UNION ALL
+SELECT * FROM rr20f_013
+UNION ALL
+SELECT * FROM rr20f_068
+UNION ALL
+SELECT * FROM rr20f_024
+ORDER BY organization

From e5c28f7e3fdb8dc50624f50a836daf9d713d6dd1 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Fri, 1 Dec 2023 14:45:17 -0800
Subject: [PATCH 07/15] add VOMS check

---
 .../int_ntd_a30_voms_vins_totals.sql          | 26 ++++++++++++++++++
 .../ntd_validation/fct_ntd_a30_vomscheck.sql  | 23 ++++++++++++++++
 .../fct_ntd_rr20_equal_totals_check.sql       | 27 ++++++++++---------
 .../stg_ntd_2023_a30_assetandresourceinfo.sql | 27 +++++++++++++++++++
 4 files changed, 91 insertions(+), 12 deletions(-)
 create mode 100644 warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
 create mode 100644 warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
 create mode 100644 warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
new file mode 100644
index 0000000000..c0a960859e
--- /dev/null
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
@@ -0,0 +1,26 @@
+--- get the # of active VINS in the inventory - DON'T HAVE
+--- get the # of VOMS in the rr-20
+-- get the # of vins in the A30
+
+with voms_rr20 as (
+    select organization,
+    fiscal_year,
+    AVG(VOMX) as rr20_voms
+    FROM {{ ref('int_ntd_rr20_service_alldata') }} 
+    GROUP BY organization, fiscal_year
+),
+
+vins_a30 as (
+    SELECT organization,
+    api_report_period as fiscal_year,
+    COUNT (DISTINCT VIN) as a30_vin_n
+    FROM {{ ref('stg_ntd_2023_a30_assetandresourceinfo') }}
+    GROUP BY organization, fiscal_year
+)
+
+select voms_rr20.*, vins_a30.a30_vin_n
+FROM voms_rr20
+FULL OUTER JOIN vins_a30
+    ON voms_rr20.organization = vins_a30.organization
+    AND voms_rr20.fiscal_year = vins_a30.fiscal_year
+ORDER BY organization, fiscal_year
\ No newline at end of file
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
new file mode 100644
index 0000000000..91d6cb20e8
--- /dev/null
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
@@ -0,0 +1,23 @@
+--- Since we don't have the Revenue Inventory table from Black Cat yet, we cannot do the other checks in the file voms_inventory_check.py.
+--- When we do get that data, we can add them here in their own CTEs, one per check, and then UNION ALL the CTEs together.
+
+WITH rr20f_180 as (
+    SELECT organization,
+    "RR20F-180: VOMS across forms" as name_of_check,
+    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1) 
+        THEN "Fail"
+        ELSE "Pass"
+    END as check_status,
+    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1) 
+        THEN "Total VOMS is greater than total A-30 vehicles reported. Please clarify."
+        ELSE "VOMS & A-30 vehicles reported are equal to and/or lower than active inventory."
+    END as description,
+    CONCAT("RR-20 VOMS = ", CAST(ROUND(rr20_voms, 1) AS STRING),
+            "# A-30 VINs = ", CAST(ROUND(a30_vin_n, 1) AS STRING)) AS value_checked,
+    CURRENT_TIMESTAMP() AS date_checked
+    FROM {{ ref('int_ntd_a30_voms_vins_totals') }} 
+)
+
+SELECT * from rr20f_180
+
+
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
index 5b523ea685..06a00c18d7 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
@@ -1,17 +1,19 @@
+--- We do identical CASE WHEN clauses in each CTE. The results determine 2 different column values but one can only specify 1 col/statement
 
 WITH rr20f_0010a as (
     select 
     organization,
     "RR20F-001OA: equal totalsfor operating expenses" as name_of_check,
-    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_byMode,0)) THEN "Fail"
+    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0)) 
+        THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN check_status = "Fail" THEN "Total_Annual_Revenues_Expended should, but does not, equal Total_Annual_Expenses_by_Mode. Please provide a narrative justification."
-        WHEN check_status = "Pass" THEN ""
-        ELSE NULL
+    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0)) 
+        THEN "Total_Annual_Revenues_Expended should, but does not, equal Total_Annual_Expenses_by_Mode. Please provide a narrative justification."
+        ELSE ""
         END as description,
-    COALESCE("Total_Annual_Revenues_Expended = $", ROUND(Total_Annual_Op_Revenues_Expended,0), 
-            ",Total_Annual_Expenses_by_Mode = $", ROUND(Total_Annual_Op_Expenses_byMode,0)) as value_checked,
+    CONCAT("Total_Annual_Revenues_Expended = $", CAST(ROUND(Total_Annual_Op_Revenues_Expended,0) AS STRING), 
+            ",Total_Annual_Expenses_by_Mode = $", CAST(ROUND(Total_Annual_Op_Expenses_by_Mode,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
     FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
 ), 
@@ -19,15 +21,16 @@ rr20f_001c as(
     select 
     organization,
     "RR20F-001C: equal totals for capital expenses by mode and funding source expenditures" as name_of_check,
-    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) THEN "Fail"
+    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) 
+        THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN check_status = "Fail" THEN "The sum of Total Expenses for all modes for Uses of Capital does not equal the sum of all values entered for Directly Generated, Non-Federal and Federal Government Funds for Uses of Capital. Please revise or explain."
-        WHEN check_status = "Pass" THEN ""
-        ELSE NULL
+    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) 
+        THEN "The sum of Total Expenses for all modes for Uses of Capital does not equal the sum of all values entered for Directly Generated, Non-Federal and Federal Government Funds for Uses of Capital. Please revise or explain."
+        ELSE ""
         END as description,
-    COALESCE("Total_Annual_Cap_Expenses_byMode = $", ROUND(Total_Annual_Cap_Expenses_byMode,0), 
-            ",Total_Annual_Cap_Expenses_byFunds = $", ROUND(Total_Annual_Cap_Expenses_byFunds,0)) as value_checked,
+    CONCAT("Total_Annual_Cap_Expenses_byMode = $", CAST(ROUND(Total_Annual_Cap_Expenses_byMode,0) AS STRING), 
+            ",Total_Annual_Cap_Expenses_byFunds = $", CAST(ROUND(Total_Annual_Cap_Expenses_byFunds,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
     FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
 )
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
new file mode 100644
index 0000000000..d6e9965e23
--- /dev/null
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
@@ -0,0 +1,27 @@
+SELECT 
+  organization,
+  reportstatus as api_report_status,
+  TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
+  reportperiod as api_report_period,
+  a30.id as id,
+  a30.VehicleId as vehicle_id,
+  a30.NTDID as ntd_id,
+  a30.VehicleLength as vehicle_length,
+  a30.FuelType as fuel_type,
+  a30.FundSource as fund_source,
+  a30.ReportId as report_id,
+  a30.AverageEstimatedServiceYearsWhenNew as average_estimated_service_years_when_new,
+  a30.VehicleStatus as vehicle_status,
+  a30.Vin as vin,
+  a30.ADAAccess as ada_access,
+  a30.VehicleType as vehicle_type,
+  a30.AverageExpirationYearsWhenNew as average_expiration_years_when_new,
+  a30.VehicleYear as vehicle_year,
+  a30.UsefulLifeYearsRemaining as useful_life_years_remaining,
+  a30.SeatingCapacity as seating_capacity,
+  a30.OwnershipType as ownership_type,
+  a30.ModesOperatedDisplayText as modes_operated_display_text,
+  a30.ModesOperatedFullText as modes_operated_full_text,
+  a30.LastModifiedDate as last_modified_date
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
+, UNNEST (`ntdassetandresourceinfo_data`) as `a30`
\ No newline at end of file

From b3b2557830ac792e17e373441eba92d4ff6d8470 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Fri, 1 Dec 2023 15:24:33 -0800
Subject: [PATCH 08/15] formatting

---
 .../ntd_validation/int_ntd_a30_voms_vins_totals.sql             | 2 +-
 .../ntd_validation/int_ntd_rr20_service_alldata.sql             | 1 -
 .../intermediate/ntd_validation/int_ntd_rr20_service_ratios.py  | 2 +-
 warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql  | 2 --
 .../mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql     | 2 +-
 .../staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql    | 2 +-
 .../staging/ntd_validation/stg_ntd_2022_rr20_financial.sql      | 2 +-
 .../models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql | 2 +-
 warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql    | 2 +-
 .../ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql    | 2 +-
 .../models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql   | 2 +-
 .../staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql   | 2 +-
 .../models/staging/ntd_validation/stg_ntd_subrecipients.sql     | 2 +-
 13 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
index c0a960859e..350fd6315c 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
@@ -23,4 +23,4 @@ FROM voms_rr20
 FULL OUTER JOIN vins_a30
     ON voms_rr20.organization = vins_a30.organization
     AND voms_rr20.fiscal_year = vins_a30.fiscal_year
-ORDER BY organization, fiscal_year
\ No newline at end of file
+ORDER BY organization, fiscal_year
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
index 8c21daf559..cd44a46c2f 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
@@ -74,4 +74,3 @@ select * FROM all_2022
 UNION ALL
 
 select * from data_2023
-
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
index 24f89bc8b2..ae44deb762 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
@@ -68,4 +68,4 @@ def model(dbt, session):
     allyears = make_ratio_cols(allyears, 'Annual_VRM', 'Annual_VRH', 'rev_speed', logger, operation = "mean")
     allyears = make_ratio_cols(allyears,  'Annual_UPT', 'Annual_VRH', 'trips_per_hr', logger, operation = "mean")
 
-    return allyears
\ No newline at end of file
+    return allyears
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
index 91d6cb20e8..583a340914 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
@@ -19,5 +19,3 @@ WITH rr20f_180 as (
 )
 
 SELECT * from rr20f_180
-
-
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
index 06a00c18d7..7bc169dd25 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
@@ -39,4 +39,4 @@ SELECT * FROM rr20f_0010a
 
 UNION ALL
 
-SELECT * FROM rr20f_001c
\ No newline at end of file
+SELECT * FROM rr20f_001c
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
index ea678124c7..c2fbca6ae8 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
@@ -2,4 +2,4 @@
 --- We pull these tables in to use them in later int and fct models
 SELECT 
     * 
-FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_expenses_by_mode`
\ No newline at end of file
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_expenses_by_mode`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
index 0409fbc451..5f465071db 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
@@ -2,4 +2,4 @@
 --- We pull these tables in to use them in later int and fct models
 SELECT 
     * 
-FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_financials__2`
\ No newline at end of file
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_financials__2`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
index 919ce31487..64c17c9b43 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
@@ -2,4 +2,4 @@
 --- We pull these tables in to use them in later int and fct models
 SELECT 
     * 
-FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_service_data`
\ No newline at end of file
+FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_service_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
index 2cda0460f6..6ecc277392 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
@@ -15,4 +15,4 @@ SELECT
   a10.DOLeasedFromPrivateEntity as do_leased_from_private_entity,
   a10.LastModifiedDate as last_modified_date
 FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdreportingstationsandmaintenance_data`) as `a10`
\ No newline at end of file
+, UNNEST (`ntdreportingstationsandmaintenance_data`) as `a10`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
index d6e9965e23..223c2e104a 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
@@ -24,4 +24,4 @@ SELECT
   a30.ModesOperatedFullText as modes_operated_full_text,
   a30.LastModifiedDate as last_modified_date
 FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdassetandresourceinfo_data`) as `a30`
\ No newline at end of file
+, UNNEST (`ntdassetandresourceinfo_data`) as `a30`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
index f68ef04afb..aeaec900d7 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
@@ -20,4 +20,4 @@ SELECT
   ntdreportingrr20_rural_data.Quantity as quantity,
   ntdreportingrr20_rural_data.LastModifiedDate as last_modified_date
 FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
\ No newline at end of file
+, UNNEST (`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
index 7b21fa5f34..003c00b758 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
@@ -13,4 +13,4 @@ SELECT
   ntdreportingrr20_urban_tribal_data.LastModifiedDate as last_modified_date
 FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
 -- `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
-, UNNEST (`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
\ No newline at end of file
+, UNNEST (`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
index 50c5208254..b935ffbd64 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
@@ -1,3 +1,3 @@
 SELECT
     Organization as organization 
-FROM blackcat_raw.2023_organizations
\ No newline at end of file
+FROM blackcat_raw.2023_organizations

From 7b3c7f6327a1dd0bc29edd78b8a5d23743ac6f0b Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Mon, 4 Dec 2023 11:09:44 -0800
Subject: [PATCH 09/15] metadata for intermediate models

---
 .../int_ntd_rr20_financial_fare_revenues.sql   |  2 +-
 .../ntd_validation/int_ntd_validation.yml      | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
index 810c14a8dc..d29fc24b6d 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
@@ -1,4 +1,4 @@
--- need fare rev and upt for each year. didn't write check correctly the first time
+-- need fare rev and upt for each year. 
 
 WITH fare_rev_2023 as (
   select 
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
index d5affd54f9..7ce803848c 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
@@ -1,13 +1,27 @@
 version: 2
 
 models:
-  - name: int_rr20_financial
+  - name: int_ntd_rr20_financial_fare_revenues
     description: | 
-      the RR-20 data that pertains to financial reporting. 
+      Setting up the RR-20 data for comparing fare revenues to previous year
     # tests:
     #   - dbt_utils.expression_is_true:
     #       expression: 'status != {{ guidelines_to_be_assessed_status() }}'
     # columns:
+  - name: int_ntd_rr20_financial_specific_funds
+    description: | 
+      Setting up the RR-20 data for comparing specific funding sources - the 5311 funds, and Other directly generated funds
+      For NTD validation error ID #s RR20F-070, RR20F-065, RR20F-068, RR20F-066, RR20F-013
+  - name: int_ntd_rr20_financial_total_exp
+    description: | 
+      Setting up the RR-20 data for comparing totals, for operating and capital expenses, reported in different ares of the RR-20
+      For NTD validation error ID #s RR20F-001OA, RR20F-001C, RR20F-182
+  - name: int_ntd_rr20_service_alldata
+    description: |
+      Combines 2023 and 2022 data in preparation for doing NTD validation checks.
+      The 2022 data was *not* from the API and so formatted differently
+      We are *assuming* that data in 2024 and onwards will be the same format as 2023
+      If you get errors in 2024, check which columns may differ and read errors carefully. 
   - name: int_ntd_rr20_service_ratios
     description: |
       makes ratios for validation checks

From bc3cf8a13604993c0c85733e7dd718a7819bde38 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Mon, 4 Dec 2023 11:48:30 -0800
Subject: [PATCH 10/15] formatting

---
 .../ntd_validation/_mart_ntd_validation.yml    | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml b/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
index 43ae3000c4..3a782c0322 100644
--- a/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
+++ b/warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
@@ -1,4 +1,20 @@
 version: 2
 
 models:
-  - name: fct_ntd_rr20_service_checks
\ No newline at end of file
+  - name: fct_ntd_rr20_service_checks
+    description: |
+      Runs validation checks on the RR-20 service data. Source data is int_ntd_rr20_service_ratios.
+      This model is still in python but should be converted to SQL as time allows.
+  - name: fct_ntd_a30_vomscheck
+    description: |
+      Runs various checks on VOMS data submitted to NTD, that are also in the file voms_inventory_check.py.
+      Since we don't have the Revenue Inventory table from Black Cat yet, we cannot do all of the checks.
+      TO DO: add when vehicle inventory becomes available.
+  - name: fct_ntd_rr20_equal_totals_check
+    description: |
+      Runs various validation checks that compare total funding amounts reported in different places.
+      For NTD validation error ID #s rr20f_0010a, rr20f_001c
+  - name: fct_ntd_rr20_funds_checks
+    description: |
+      Runs various validation checks on specific RR-20 funding source data.
+      For NTD validation error ID #s rr20f_070, rr20f_066, rr20f_065, rr20f_013, rr20f_068, rr20f_024

From 0abdfbf401502ebd6d188b8b40508ce740f52f35 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Wed, 6 Dec 2023 15:39:03 -0800
Subject: [PATCH 11/15] formatting

---
 .../external_table_all_ntdreports.yml         | 397 +++++++++++++++++-
 ..._for_ntd.yml => all_submitted_for_ntd.yml} |   6 +-
 .../publish_validation_report.py              |  75 ----
 airflow/plugins/operators/blackcat_to_gcs.py  |  87 ++--
 4 files changed, 440 insertions(+), 125 deletions(-)
 rename airflow/dags/ntd_report_from_blackcat/{all_2023_submitted_for_ntd.yml => all_submitted_for_ntd.yml} (62%)
 delete mode 100644 airflow/dags/ntd_report_publish_validation/publish_validation_report.py

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
index ad5fe5517c..a6f9c41dd1 100644
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
@@ -3,17 +3,398 @@ bucket: gs://calitp-ntd-report-validation
 prefix_bucket: false
 post_hook: |
   SELECT *
-  FROM `{{ get_project_id() }}`.external_blackcat.all_2023_ntdreports
+  FROM `{{ get_project_id() }}`.external_blackcat.all_ntdreports
   LIMIT 1;
 source_objects:
-  - "all_2023_NTDReporting/*.jsonl.gz"
-destination_project_dataset_table: "external_blackcat.all_2023_ntdreports"
+  - "all_NTDReporting/*.jsonl.gz"
+destination_project_dataset_table: "external_blackcat.all_ntdreports"
 source_format: NEWLINE_DELIMITED_JSON
 use_bq_client: true
 hive_options:
-  mode: AUTO
+  mode: CUSTOM
   require_partition_filter: false
-  source_uri_prefix: "all_2023_NTDReporting/"
-
-
-
+  source_uri_prefix: "all_NTDReporting/{year:STRING}/{dt:DATE}/{ts:TIMESTAMP}/"
+schema_fields:
+  - name: reportid
+    type: INTEGER
+  - name: organization
+    type: STRING
+  - name: reportperiod
+    type: INTEGER
+  - name: reportstatus
+    type: STRING
+  - name: reportlastmodifieddate
+    type: TIMESTAMP
+  - name: ntdreportingstationsandmaintenance_data
+    type: RECORD
+    fields:
+      - name: Id
+        type: INTEGER
+      - name: ServiceMode
+        type: STRING
+      - name: ReportId
+        type: INTEGER
+      - name: PTOwnedByServiceProvider
+        type: FLOAT
+      - name: PTOwnedByPublicAgency
+        type: FLOAT
+      - name: PTLeasedByServiceProvider
+        type: FLOAT
+      - name: PTLeasedByPublicAgency
+        type: FLOAT
+      - name: DOOwned
+        type: FLOAT
+      - name: DOLeasedByPublicAgency
+        type: FLOAT
+      - name: DOLeasedFromPrivateEntity
+        type: FLOAT
+      - name: LastModifiedDate
+        type: TIMESTAMP
+  - name: ntdtransitassetmanagementa15_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: FacilityId
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: FacilityName
+      type: STRING
+    - name: PrimaryMode
+      type: STRING
+    - name: FacilityClass
+      type: STRING
+    - name: FacilityType
+      type: STRING
+    - name: YearBuilt
+      type: INTEGER
+    - name: Size
+      type: STRING
+    - name: DOTCapitalResponsibility
+      type: FLOAT
+    - name: OrganizationCapitalResponsibility
+      type: FLOAT
+    - name: ConditionAssessment
+      type: FLOAT
+    - name: ConditionAssessment
+      type: TIMESTAMP
+    - name: SectionOfLargerFacility
+      type: BOOLEAN
+    - name: Latitude
+      type: FLOAT
+    - name: LatitudeDirection
+      type: STRING
+    - name: Longitude
+      type: FLOAT
+    - name: LongitudeDirection
+      type: STRING
+    - name: SecondaryMode
+      type: STRING
+    - name: PrivateMode
+      type: STRING
+  - name: ntdassetandresourceinfo_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: VehicleId
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: VehicleStatus
+      type: STRING
+    - name: Vin
+      type: INTEGER
+    - name: NTDID
+      type: STRING
+    - name: ADAAccess
+      type: BOOLEAN
+    - name: VehicleType
+      type: STRING
+    - name: FuelType
+      type: STRING
+    - name: FundSource
+      type: STRING
+    - name: AverageEstimatedServiceYearsWhenNew
+      type: INTEGER
+    - name: AverageExpirationYearsWhenNew
+      type: INTEGER
+    - name: VehicleYear
+      type: INTEGER
+    - name: UsefulLifeYearsRemaining
+      type: INTEGER
+    - name: VehicleLength
+      type: FLOAT
+    - name: SeatingCapacity
+      type: INTEGER
+    - name: OwnershipType
+      type: STRING
+    - name: ModesOperatedDisplayText
+      type: STRING
+    - name: ModesOperatedFullText
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingp10_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: OrgId
+      type: INTEGER
+    - name: UserId
+      type: STRING
+    - name: FirstName
+      type: STRING
+    - name: LastName
+      type: STRING
+    - name: FullName
+      type: RECORD
+      fields:
+      - name: id
+        type: INTEGER
+      - name: Text
+        type: STRING
+      - name: Value
+        type: STRING
+      - name: Group
+        type: STRING
+      - name: BoolValue
+        type: BOOLEAN
+    - name: PrimaryPhone
+      type: STRING
+    - name: Email
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingp20_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: ServiceMode
+      type: STRING
+    - name: TypeOfService
+      type: STRING
+    - name: CommitmentDate
+      type: STRING
+    - name: StartDate
+      type: STRING
+    - name: EndDate
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingp50_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Mode
+      type: RECORD
+      fields:
+      - name: id
+        type: STRING
+      - name: Text
+        type: STRING
+      - name: Value
+        type: STRING
+      - name: Group
+        type: STRING
+      - name: BoolValue
+        type: BOOLEAN
+    - name: Type
+      type: RECORD
+      fields:
+      - name: id
+        type: STRING
+      - name: Text
+        type: STRING
+      - name: Value
+        type: STRING
+      - name: Group
+        type: STRING
+      - name: BoolValue
+        type: BOOLEAN
+    - name: WebLink
+      type: STRING
+    - name: FilePath
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportinga35_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: EquipmentName
+      type: STRING
+    - name: EquipmentId
+      type: INTEGER
+    - name: VehicleType
+      type: STRING
+    - name: PrimaryMode
+      type: STRING
+    - name: SecondaryMode
+      type: STRING
+    - name: TotalVehicles
+      type: STRING
+    - name: UsefulLifeBenchmark
+      type: INTEGER
+    - name: YearOfManufacture
+      type: INTEGER
+    - name: TransitAgencyCapitalResponsibility
+      type: FLOAT
+    - name: EstimatedCost
+      type: FLOAT
+    - name: YearDollarsEstimatedCost
+      type: INTEGER
+    - name: UsefulLifeYearsBenchMark
+      type: INTEGER
+    - name: UsefulLifeYearsRemaining
+      type: INTEGER
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingrr20_intercity_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ItemId
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Item
+      type: STRING
+    - name: Type
+      type: STRING
+    - name: OperationsExpended
+      type: FLOAT
+    - name: CapitalExpended
+      type: FLOAT
+    - name: Description
+      type: STRING
+    - name: AnnualVehicleRevMiles
+      type: INTEGER
+    - name: RegularUnlinkedPassengerTrips
+      type: INTEGER
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingrr20_rural_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Item
+      type: STRING
+    - name: Revenue
+      type: STRING
+    - name: Type
+      type: STRING
+    - name: CssClass
+      type: STRING
+    - name: OperationsExpended
+      type: FLOAT
+    - name: CapitalExpended
+      type: FLOAT
+    - name: Description
+      type: STRING
+    - name: AnnualVehicleRevMiles
+      type: INTEGER
+    - name: AnnualVehicleRevHours
+      type: INTEGER
+    - name: AnnualUnlinkedPassTrips
+      type: INTEGER
+    - name: AnnualVehicleMaxService
+      type: INTEGER
+    - name: SponsoredServiceUPT
+      type: INTEGER
+    - name: Quantity
+      type: INTEGER
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingrr20_urban_tribal_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ItemId
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Item
+      type: STRING
+    - name: OperationsExpended
+      type: FLOAT
+    - name: CapitalExpended
+      type: FLOAT
+    - name: Description
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ntdreportingtamnarrative_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Type
+      type: STRING
+    - name: Category
+      type: STRING
+    - name: VehiclesInAssetClass
+      type: INTEGER
+    - name: VehiclesExceededULBTAMPlan
+      type: INTEGER
+    - name: TAMPlanGoalsDescription
+      type: STRING
+    - name: VehiclesToBeRetiredBeyondULB
+      type: INTEGER
+    - name: VehiclesPastULBInTAM
+      type: INTEGER
+    - name: LastModifiedDate
+      type: TIMESTAMP
+  - name: ss60_data
+    type: RECORD
+    fields:
+    - name: Id
+      type: INTEGER
+    - name: ItemId
+      type: INTEGER
+    - name: ReportId
+      type: INTEGER
+    - name: Item
+      type: STRING
+    - name: Type
+      type: STRING
+    - name: CssClass
+      type: STRING
+    - name: TransitVehicleAssualts
+      type: INTEGER
+    - name: RevenueFacilityAssualts
+      type: INTEGER
+    - name: NonRevenueFacilityAssualts
+      type: INTEGER
+    - name: OtherLocationAssualts
+      type: INTEGER
+    - name: MajorEvents
+      type: INTEGER
+    - name: Fatalities
+      type: INTEGER
+    - name: Injuries
+      type: INTEGER
+    - name: Quantity
+      type: STRING
+    - name: LastModifiedDate
+      type: TIMESTAMP
diff --git a/airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml b/airflow/dags/ntd_report_from_blackcat/all_submitted_for_ntd.yml
similarity index 62%
rename from airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml
rename to airflow/dags/ntd_report_from_blackcat/all_submitted_for_ntd.yml
index ce0acd3528..53263c8d33 100644
--- a/airflow/dags/ntd_report_from_blackcat/all_2023_submitted_for_ntd.yml
+++ b/airflow/dags/ntd_report_from_blackcat/all_submitted_for_ntd.yml
@@ -1,7 +1,7 @@
 operator: operators.BlackCatApiToGCSOperator
 
 bucket: "gs://calitp-ntd-report-validation"
-api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/2023"
-api_tablename: "2023_NTDReporting" 
+api_url: "https://services.blackcattransit.com/api/APIModules/GetNTDReportsByYear/BCG_CA/"
+api_tablename_suffix: "NTDReporting"
 form: "all"
-bq_table_name: "2023_ntdreports"
\ No newline at end of file
+bq_table_name_suffix: "ntdreports"
diff --git a/airflow/dags/ntd_report_publish_validation/publish_validation_report.py b/airflow/dags/ntd_report_publish_validation/publish_validation_report.py
deleted file mode 100644
index 7b5b4e9e82..0000000000
--- a/airflow/dags/ntd_report_publish_validation/publish_validation_report.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# ---
-# python_callable: publish_report
-# provide_context: true
-# ---
-from google.cloud import bigquery
-import pandas as pd
-import datetime
-import re
-
-import google.auth
-import google.auth.transport.requests
-
-import pendulum
-from calitp_data_infra.storage import (
-    fetch_all_in_partition,
-    get_fs,
-)
-
-
-def publish_report():
-    client = bigquery.Client()
-    print("Got BG client!")
-    project = "cal-itp-data-infra-staging"
-    dataset_id = "staging_staging"
-
-    dataset_ref = bigquery.DatasetReference(project, dataset_id)
-    table_ref = dataset_ref.table("fct_ntd_rr20_service_checks")
-    table = client.get_table(table_ref)
-    print("Got table!")
-
-    df = client.list_rows(table).to_dataframe()
-    print("Got df from BQ!")
-    print(df.head())
-
-
-    # this_year=datetime.datetime.now().year
-    # ## Part 1: save Excel file to GCS (for emailing to subrecipients)
-    # GCS_FILE_PATH_VALIDATED = f"gs://calitp-ntd-report-validation/validation_reports_{this_year}" 
-    # with pd.ExcelWriter(f"{GCS_FILE_PATH_VALIDATED}/rr20_service_check_report_{this_date}.xlsx") as writer:
-    #     rr20_checks.to_excel(writer, sheet_name="rr20_checks_full", index=False, startrow=2)
-
-    #     workbook = writer.book
-    #     worksheet = writer.sheets["rr20_checks_full"]
-    #     cell_highlight = workbook.add_format({
-    #         'fg_color': 'yellow',
-    #         'bold': True,
-    #         'border': 1
-    #     })
-    #     report_title = "NTD Data Validation Report"
-    #     title_format = workbook.add_format({
-    #             'bold': True,
-    #             'valign': 'center',
-    #             'align': 'left',
-    #             'font_color': '#1c639e',
-    #             'font_size': 15
-    #             })
-    #     subtitle = "Reduced Reporting RR-20: Validation Warnings"
-    #     subtitle_format = workbook.add_format({
-    #         'bold': True,
-    #         'align': 'left',
-    #         'font_color': 'black',
-    #         'font_size': 19
-    #         })
-        
-    #     worksheet.write('A1', report_title, title_format)
-    #     worksheet.merge_range('A2:C2', subtitle, subtitle_format)
-    #     worksheet.write('G3', 'Agency Response', cell_highlight)
-    #     worksheet.write('H3', 'Response Date', cell_highlight)
-    #     worksheet.set_column(0, 0, 35) #col A width
-    #     worksheet.set_column(1, 3, 22) #cols B-D width
-    #     worksheet.set_column(4, 4, 11) #col D width
-    #     worksheet.set_column(5, 6, 53) #col E-G width
-    #     worksheet.freeze_panes('B4')
-    
-
diff --git a/airflow/plugins/operators/blackcat_to_gcs.py b/airflow/plugins/operators/blackcat_to_gcs.py
index bf9d85d8e2..dcbbc43458 100644
--- a/airflow/plugins/operators/blackcat_to_gcs.py
+++ b/airflow/plugins/operators/blackcat_to_gcs.py
@@ -1,24 +1,27 @@
-from calitp_data_infra.storage import get_fs, make_name_bq_safe
-from airflow.models import BaseOperator
-from pydantic import BaseModel
+import gzip
+import logging
+import os
 from typing import Optional
+
 import pandas as pd
 import pendulum
 import requests
-import logging
-import gzip
-import os
-import re
+from calitp_data_infra.storage import get_fs, make_name_bq_safe
+from pydantic import BaseModel
+
+from airflow.models import BaseOperator
+
 
 def write_to_log(logfilename):
-    '''
+    """
     Creates a logger object that outputs to a log file, to the filename specified,
     and also streams to console.
-    '''
+    """
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
-    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
-                                  datefmt='%y-%m-%d %H:%M:%S')
+    formatter = logging.Formatter(
+        "%(asctime)s:%(levelname)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
+    )
     file_handler = logging.FileHandler(logfilename)
     file_handler.setFormatter(formatter)
     stream_handler = logging.StreamHandler()
@@ -34,20 +37,20 @@ def write_to_log(logfilename):
 class BlackCatApiExtract(BaseModel):
     api_url: str
     form: str
-    api_tablename: str
-    bq_table_name: str
+    api_tablename_suffix: str
+    bq_table_name_suffix: str
     data: Optional[pd.DataFrame]
     logger: Optional[logging.Logger]
     extract_time: Optional[pendulum.DateTime]
 
-    logger = write_to_log('load_bc_apidata_output.log')
+    logger = write_to_log("load_bc_apidata_output.log")
     extract_time = pendulum.now()
-    
+
     # pydantic doesn't know dataframe type
     # see https://stackoverflow.com/a/69200069
     class Config:
         arbitrary_types_allowed = True
-    
+
     def fetch_from_bc_api(self):
         """Download a BlackCat table as a DataFrame.
 
@@ -65,19 +68,23 @@ def fetch_from_bc_api(self):
             2. rename fields
             3. apply column prefix (to columns not renamed by 1 or 2)
         """
-        
+
         self.logger.info(
-            f"Downloading BlackCat data for {self.extract_time.format('YYYY')}_{self.bq_table_name}."
+            f"Downloading BlackCat data for {self.extract_time.format('YYYY')}_{self.bq_table_name_suffix}."
         )
-        response = requests.get(self.api_url, verify=False)
-        blob = response.json()  
-        
+        # will automatically add the current year to the API url so that it ends with "/YYYY".
+        url = self.api_url + self.extract_time.format("YYYY")
+        response = requests.get(url)
+        blob = response.json()
+
         raw_df = pd.json_normalize(blob)
-        raw_df['ReportLastModifiedDate'] = raw_df['ReportLastModifiedDate'].astype('datetime64[ns]')
+        raw_df["ReportLastModifiedDate"] = raw_df["ReportLastModifiedDate"].astype(
+            "datetime64[ns]"
+        )
 
         self.data = raw_df.rename(make_name_bq_safe, axis="columns")
         self.logger.info(
-            f"Downloaded {self.extract_time.format('YYYY')}_{self.bq_table_name} data with {len(self.data)} rows!"
+            f"Downloaded {self.bq_table_name_suffix} data for {self.extract_time.format('YYYY')} with {len(self.data)} rows!"
         )
 
     def make_hive_path(self, form: str, bucket: str):
@@ -85,22 +92,23 @@ def make_hive_path(self, form: str, bucket: str):
             raise ValueError(
                 "An extract time must be set before a hive path can be generated."
             )
-        bq_form_name = (
-            str.lower(form).replace("-", "")
-        )
+        bq_form_name = str.lower(form).replace("-", "")
         return os.path.join(
             bucket,
-            f"{bq_form_name}_{self.api_tablename}",
+            f"{bq_form_name}_{self.api_tablename_suffix}",
+            f"year={self.extract_time.format('YYYY')}",
             f"dt={self.extract_time.to_date_string()}",
             f"ts={self.extract_time.to_iso8601_string()}",
-            f"{bq_form_name}_{self.bq_table_name}.jsonl.gz",
+            f"{bq_form_name}_{self.bq_table_name_suffix}.jsonl.gz",
         )
 
     def save_to_gcs(self, fs, bucket):
         hive_path = self.make_hive_path(self.form, bucket)
         self.logger.info(f"Uploading to GCS at {hive_path}")
         if len(self.data) == 0:
-            self.logger.info(f"There is no data for {self.api_tablename}, not saving anything. Pipeline exiting.")
+            self.logger.info(
+                f"There is no data for {self.api_tablename_suffix} for {self.extract_time.format('YYYY')}, not saving anything. Pipeline exiting."
+            )
             pass
         else:
             fs.pipe(
@@ -118,30 +126,31 @@ def __init__(
         bucket,
         api_url,
         form,
-        api_tablename,
-        bq_table_name,
+        api_tablename_suffix,
+        bq_table_name_suffix,
         **kwargs,
     ):
         """An operator that downloads all data from a BlackCat API
             and saves it as one JSON file hive-partitioned by date in Google Cloud
-            Storage (GCS). Each org's data will be in 1 row, and for each separate table in the API, 
-            a nested column will hold all of it's data. 
+            Storage (GCS). Each org's data will be in 1 row, and for each separate table in the API,
+            a nested column will hold all of it's data.
 
         Args:
             bucket (str): GCS bucket where the scraped BlackCat report will be saved.
-            api_url (str): The URL to hit that gets the data.
-            api_tablename (str): The table that should be extracted from the BlackCat API. 
+            api_url (str): The URL to hit that gets the data. This is dynamically appended with the current year, so that
+             ... in 2023 it will pull data from the ".../2023" url and in 2024, ".../2024" etc.
+            api_tablename_suffix (str): The table that should be extracted from the BlackCat API.
                 MUST MATCH THE API JSON EXACTLY
-            bq_table_name (str): The table name that will be given in BigQuery. Appears in the GCS bucket path and the filename.
-            form: the NTD form that this report belongs to. E.g., RR-20, A-10, etc.                
+            bq_table_name_suffix (str): The table name that will be given in BigQuery. Appears in the GCS bucket path and the filename.
+            form: the NTD form that this report belongs to. E.g., RR-20, A-10, etc. Since it's all forms, here it's "all"
         """
         self.bucket = bucket
         # Instantiating an instance of the BlackCatApiExtract()
         self.extract = BlackCatApiExtract(
             api_url=api_url,
             form=form,
-            api_tablename=api_tablename,
-            bq_table_name=bq_table_name,
+            api_tablename_suffix=api_tablename_suffix,
+            bq_table_name_suffix=bq_table_name_suffix,
         )
 
         super().__init__(**kwargs)

From ac912393df87732ace8d24e7cfe09224ee063390 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Wed, 6 Dec 2023 22:02:12 -0800
Subject: [PATCH 12/15] schema fixes

---
 .../external_table_all_ntdreports.yml         | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
index a6f9c41dd1..748cbe1279 100644
--- a/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
+++ b/airflow/dags/create_external_tables/ntd_report_validation/external_table_all_ntdreports.yml
@@ -24,9 +24,10 @@ schema_fields:
   - name: reportstatus
     type: STRING
   - name: reportlastmodifieddate
-    type: TIMESTAMP
+    type: INTEGER
   - name: ntdreportingstationsandmaintenance_data
     type: RECORD
+    mode: REPEATED
     fields:
       - name: Id
         type: INTEGER
@@ -52,6 +53,7 @@ schema_fields:
         type: TIMESTAMP
   - name: ntdtransitassetmanagementa15_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -77,16 +79,16 @@ schema_fields:
       type: FLOAT
     - name: ConditionAssessment
       type: FLOAT
-    - name: ConditionAssessment
+    - name: ConditionAssessmentDate
       type: TIMESTAMP
     - name: SectionOfLargerFacility
       type: BOOLEAN
     - name: Latitude
-      type: FLOAT
+      type: STRING
     - name: LatitudeDirection
       type: STRING
     - name: Longitude
-      type: FLOAT
+      type: STRING
     - name: LongitudeDirection
       type: STRING
     - name: SecondaryMode
@@ -95,6 +97,7 @@ schema_fields:
       type: STRING
   - name: ntdassetandresourceinfo_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -138,6 +141,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingp10_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -172,6 +176,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingp20_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -191,6 +196,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingp50_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -230,6 +236,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportinga35_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -248,7 +255,7 @@ schema_fields:
     - name: TotalVehicles
       type: STRING
     - name: UsefulLifeBenchmark
-      type: INTEGER
+      type: BOOLEAN
     - name: YearOfManufacture
       type: INTEGER
     - name: TransitAgencyCapitalResponsibility
@@ -265,6 +272,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingrr20_intercity_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -290,6 +298,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingrr20_rural_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -325,6 +334,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingrr20_urban_tribal_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -344,6 +354,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ntdreportingtamnarrative_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER
@@ -367,6 +378,7 @@ schema_fields:
       type: TIMESTAMP
   - name: ss60_data
     type: RECORD
+    mode: REPEATED
     fields:
     - name: Id
       type: INTEGER

From ac46ff1b07a4bd50b3bb419d2a0c76cd0e254903 Mon Sep 17 00:00:00 2001
From: Laurie Merrell <laurie.m@jarv.us>
Date: Thu, 7 Dec 2023 09:48:58 -0600
Subject: [PATCH 13/15] address linter issues

---
 .../ntd_report_from_blackcat/METADATA.yml     |   2 +-
 .../dags/ntd_report_from_blackcat/README.md   |   8 +-
 .../METADATA.yml                              |   2 +-
 .../ntd_report_publish_validation/README.md   |   4 +-
 airflow/plugins/operators/__init__.py         |   2 +-
 .../int_ntd_a30_voms_vins_totals.sql          |   4 +-
 .../int_ntd_rr20_financial_fare_revenues.sql  |  18 +-
 .../int_ntd_rr20_financial_specific_funds.sql |  14 +-
 .../int_ntd_rr20_financial_total_exp.sql      |   2 +-
 .../int_ntd_rr20_service_alldata.sql          |  11 +-
 .../int_ntd_rr20_service_ratios.py            |  73 +++--
 .../ntd_validation/int_ntd_validation.yml     |  10 +-
 .../ntd_validation/fct_ntd_a30_vomscheck.sql  |   6 +-
 .../fct_ntd_rr20_equal_totals_check.sql       |  18 +-
 .../fct_ntd_rr20_funds_checks.sql             |  74 ++---
 .../fct_ntd_rr20_service_checks.py            | 266 ++++++++++++------
 .../ntd_validation/_src_api_externaltable.yml |   4 +-
 .../stg_ntd_2022_rr20_exp_by_mode.sql         |   5 +-
 .../stg_ntd_2022_rr20_financial.sql           |   5 +-
 .../stg_ntd_2022_rr20_service.sql             |   5 +-
 .../ntd_validation/stg_ntd_2023_a10.sql       |   6 +-
 .../stg_ntd_2023_a30_assetandresourceinfo.sql |   6 +-
 .../stg_ntd_2023_rr20_rural.sql               |   6 +-
 .../stg_ntd_2023_rr20_urban_tribal.sql        |   6 +-
 .../ntd_validation/stg_ntd_subrecipients.sql  |   2 +-
 25 files changed, 336 insertions(+), 223 deletions(-)

diff --git a/airflow/dags/ntd_report_from_blackcat/METADATA.yml b/airflow/dags/ntd_report_from_blackcat/METADATA.yml
index d8a59f4b6e..a6ee5e2843 100644
--- a/airflow/dags/ntd_report_from_blackcat/METADATA.yml
+++ b/airflow/dags/ntd_report_from_blackcat/METADATA.yml
@@ -15,4 +15,4 @@ default_args:
     pool: default_pool
     concurrency: 50
 wait_for_defaults:
-    timeout: 3600
\ No newline at end of file
+    timeout: 3600
diff --git a/airflow/dags/ntd_report_from_blackcat/README.md b/airflow/dags/ntd_report_from_blackcat/README.md
index 4189a8a187..18cd55f96d 100644
--- a/airflow/dags/ntd_report_from_blackcat/README.md
+++ b/airflow/dags/ntd_report_from_blackcat/README.md
@@ -2,8 +2,8 @@
 
 Type: [Now|Scheduled](https://docs.calitp.org/data-infra/airflow/dags-maintenance.html)
 
-This DAG orchestrates the publishing and storing of data, in the form of NTD report submissions, first pushing API data into  Google Cloud Storage in the bucket `calitp-ntd-report-validation`.  
-  
-Another DAG (part of the `create_external_tables` existing DAG) reads the GCS data in BigQuery in the Cal-ITP data warehouse. The job will take the most recent file of each report type (which has all submitted reports by Caltrans 5311 subrecipients) and publish it into BigQuery `external` tables, if it is not yet there. This job uses the Cal-ITP existing infrastructure for creating external tables, outlined [here](https://docs.calitp.org/data-infra/architecture/data.html). 
+This DAG orchestrates the publishing and storing of data, in the form of NTD report submissions, first pushing API data into  Google Cloud Storage in the bucket `calitp-ntd-report-validation`.
 
-In the event of failure, the job can be rerun without backfilling. 
\ No newline at end of file
+Another DAG (part of the `create_external_tables` existing DAG) reads the GCS data in BigQuery in the Cal-ITP data warehouse. The job will take the most recent file of each report type (which has all submitted reports by Caltrans 5311 subrecipients) and publish it into BigQuery `external` tables, if it is not yet there. This job uses the Cal-ITP existing infrastructure for creating external tables, outlined [here](https://docs.calitp.org/data-infra/architecture/data.html).
+
+In the event of failure, the job can be rerun without backfilling.
diff --git a/airflow/dags/ntd_report_publish_validation/METADATA.yml b/airflow/dags/ntd_report_publish_validation/METADATA.yml
index 0f97d22bff..f41c5cfe59 100644
--- a/airflow/dags/ntd_report_publish_validation/METADATA.yml
+++ b/airflow/dags/ntd_report_publish_validation/METADATA.yml
@@ -15,4 +15,4 @@ default_args:
     pool: default_pool
     concurrency: 50
 wait_for_defaults:
-    timeout: 3600
\ No newline at end of file
+    timeout: 3600
diff --git a/airflow/dags/ntd_report_publish_validation/README.md b/airflow/dags/ntd_report_publish_validation/README.md
index 28977df52d..33fda0cd15 100644
--- a/airflow/dags/ntd_report_publish_validation/README.md
+++ b/airflow/dags/ntd_report_publish_validation/README.md
@@ -2,6 +2,6 @@
 
 Type: [Now|Scheduled](https://docs.calitp.org/data-infra/airflow/dags-maintenance.html)
 
-This DAG orchestrates the publishing of NTD Report validation checks in the form of Excel files, that it saves into Google Cloud Storage. Checks conducted on submitted NTD report submissions, previously stored into BigQuery with dbt models. They are then converted to Excel files and saves in the Google Cloud Storage bucket `calitp-ntd-report-validation`.  
+This DAG orchestrates the publishing of NTD Report validation checks in the form of Excel files, that it saves into Google Cloud Storage. Checks conducted on submitted NTD report submissions, previously stored into BigQuery with dbt models. They are then converted to Excel files and saves in the Google Cloud Storage bucket `calitp-ntd-report-validation`.
 
-In the event of failure, the job can be rerun without backfilling. 
\ No newline at end of file
+In the event of failure, the job can be rerun without backfilling.
diff --git a/airflow/plugins/operators/__init__.py b/airflow/plugins/operators/__init__.py
index 39271c95c8..9ffc42acf9 100644
--- a/airflow/plugins/operators/__init__.py
+++ b/airflow/plugins/operators/__init__.py
@@ -1,9 +1,9 @@
 # flake8: noqa
 from operators.airtable_to_gcs import AirtableToGCSOperator
+from operators.blackcat_to_gcs import BlackCatApiToGCSOperator
 from operators.external_table import ExternalTable
 from operators.gtfs_csv_to_jsonl import GtfsGcsToJsonlOperator
 from operators.gtfs_csv_to_jsonl_hourly import GtfsGcsToJsonlOperatorHourly
 from operators.littlepay_raw_sync import LittlepayRawSync
 from operators.littlepay_to_jsonl import LittlepayToJSONL
 from operators.pod_operator import PodOperator
-from operators.blackcat_to_gcs import BlackCatApiToGCSOperator
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
index 350fd6315c..a5df5e51a3 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_a30_voms_vins_totals.sql
@@ -6,14 +6,14 @@ with voms_rr20 as (
     select organization,
     fiscal_year,
     AVG(VOMX) as rr20_voms
-    FROM {{ ref('int_ntd_rr20_service_alldata') }} 
+    FROM {{ ref('int_ntd_rr20_service_alldata') }}
     GROUP BY organization, fiscal_year
 ),
 
 vins_a30 as (
     SELECT organization,
     api_report_period as fiscal_year,
-    COUNT (DISTINCT VIN) as a30_vin_n
+    COUNT(DISTINCT VIN) as a30_vin_n
     FROM {{ ref('stg_ntd_2023_a30_assetandresourceinfo') }}
     GROUP BY organization, fiscal_year
 )
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
index d29fc24b6d..cece952604 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_fare_revenues.sql
@@ -1,7 +1,7 @@
--- need fare rev and upt for each year. 
+-- need fare rev and upt for each year.
 
 WITH fare_rev_2023 as (
-  select 
+  select
     organization,
     api_report_period as fiscal_year,
     item as mode,
@@ -10,7 +10,7 @@ WITH fare_rev_2023 as (
      WHERE type = "Fare Revenues"
 ),
 upt_2023 as (
-  select 
+  select
     organization,
     api_report_period as fiscal_year,
     item as mode,
@@ -19,8 +19,12 @@ upt_2023 as (
      WHERE type = "Service Data"
 ),
 all_2023 as (
-  select fare_rev_2023.*, upt_2023.Annual_UPT
-  FROM fare_rev_2023 
+  select fare_rev_2023.organization,
+    fare_rev_2023.fiscal_year,
+    fare_rev_2023.mode,
+    fare_rev_2023.Fare_Revenues,
+    upt_2023.Annual_UPT
+  FROM fare_rev_2023
   FULL OUTER JOIN upt_2023
     ON fare_rev_2023.organization = upt_2023.organization
     AND fare_rev_2023.mode = upt_2023.mode
@@ -33,7 +37,7 @@ fare_rev_2022 as (
   GROUP BY organization, fiscal_year
 ),
 upt_2022 as (
-  select 
+  select
     Organization_Legal_Name as organization,
     Fiscal_Year as  fiscal_year,
     Mode as mode,
@@ -43,7 +47,7 @@ from {{ ref('stg_ntd_2022_rr20_service') }}
 all_2022 as (
   select fare_rev_2022.organization, fare_rev_2022.fiscal_year,
     upt_2022.Mode, fare_rev_2022.Fare_Revenues, upt_2022.Annual_UPT
-  FROM fare_rev_2022 
+  FROM fare_rev_2022
   FULL OUTER JOIN upt_2022
     ON fare_rev_2022.organization = upt_2022.organization
 )
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
index c3af6aa0c9..768ca69c44 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_specific_funds.sql
@@ -1,7 +1,7 @@
 -------
--- NTD validation errors about these 1 specific funding sources. 
+-- NTD validation errors about these 1 specific funding sources.
 --- ID #s RR20F-070, RR20F-065, RR20F-068, RR20F-066, RR20F-013. Sums the capital expenses across all funding sources
---- In 2022 the data is a different format than 2023 **and onwards**. 
+--- In 2022 the data is a different format than 2023 **and onwards**.
 --- Only needed for the 2023 error checking (to compare to "last year"). In 2024 you don't need 2022 data.
 -------
 
@@ -16,9 +16,9 @@ WITH longform_2023 AS (
         'Other Directly Generated Funds', 'Other_Directly_Generated_Funds'),
     'Local Funds', 'Local_Funds') as item
      FROM {{ ref('stg_ntd_2023_rr20_rural') }}
-     WHERE item LIKE "%Directly Generated Funds%" OR
-      item LIKE "%Formula Grants for Rural Areas%" OR
-      item LIKE "Local Funds"
+     WHERE item LIKE "%Directly Generated Funds%"
+      OR item LIKE "%Formula Grants for Rural Areas%"
+      OR item LIKE "Local Funds"
 ),
 wide_2023 AS (
     SELECT * FROM
@@ -33,11 +33,11 @@ data_2022 AS (
         SUM(FTA_Formula_Grants_for_Rural_Areas_5311) as FTA_Formula_Grants_for_Rural_Areas_5311_2022,
         Null as Local_Funds_2022
     FROM {{ ref('stg_ntd_2022_rr20_financial') }}
-    GROUP BY 1,2
+    GROUP BY 1,2 -- noqa: L054
     ORDER BY organization
 )
 
-select wide_2023.organization, 
+select wide_2023.organization,
     wide_2023.FTA_Formula_Grants_for_Rural_Areas_5311 as FTA_Formula_Grants_for_Rural_Areas_5311_2023,
     wide_2023.Other_Directly_Generated_Funds as Other_Directly_Generated_Funds_2023,
     wide_2023.Local_Funds as Local_Funds_2023,
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
index 06211cc877..164a3c8403 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_financial_total_exp.sql
@@ -37,7 +37,7 @@ total_cap_exp_byfunds_2023 as (
      group by organization, api_report_period
 )
 
-SELECT 
+SELECT
     total_operations_exp_2023.*,
     total_capital_exp_bymode_2023.Total_Annual_Cap_Expenses_byMode,
     total_operations_rev_2023.Total_Annual_Op_Revenues_Expended,
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
index cd44a46c2f..20a14257ad 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
@@ -6,10 +6,10 @@
 --- We are *assuming* that data in 2024 and onwards will be the same format as 2023
 --- If you get errors in 2024, check which columns may differ and read errors carefully.
 
----TO DO: insert parameter for loop, for each year, do what 2023 is doing, 
+---TO DO: insert parameter for loop, for each year, do what 2023 is doing,
 --- and at the end, add another union statement
 with data_2023 as (
-    select 
+    select
     organization,
     api_report_period as fiscal_year,
     item as mode,
@@ -17,7 +17,6 @@ with data_2023 as (
     CASE
         WHEN description = "Operating Expenses" THEN operations_expended
         WHEN description = "Capital Expenses" THEN capital_expended
-        ELSE Null
     END as Total_Annual_Expenses_By_Mode,
     annual_vehicle_rev_miles as Annual_VRM,
     annual_vehicle_rev_hours as Annual_VRH,
@@ -29,7 +28,7 @@ with data_2023 as (
 ),
 
 service2022 as (
-    select 
+    select
     Organization_Legal_Name as organization,
     Fiscal_Year as  fiscal_year,
     Mode as mode,
@@ -42,7 +41,7 @@ service2022 as (
 ),
 
 expenses2022 as (
-    select 
+    select
     Organization_Legal_Name as organization,
     Fiscal_Year as  fiscal_year,
     Operating_Capital as operating_capital,
@@ -63,7 +62,7 @@ all_2022 as (
         service2022.Sponsored_UPT,
         service2022.VOMX
 from service2022
-FULL OUTER JOIN expenses2022 
+FULL OUTER JOIN expenses2022
     ON service2022.organization = expenses2022.organization
     AND service2022.fiscal_year = expenses2022.fiscal_year
     AND service2022.mode = expenses2022.mode
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
index ae44deb762..a20e7e16d1 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
@@ -1,19 +1,21 @@
-
-import pyspark.sql.functions as F
-import pandas as pd
 import logging
-import pyspark
+
+import pandas as pd  # noqa: F401
+import pyspark  # noqa: F401
+import pyspark.sql.functions as F  # noqa: F401
 
 
 def write_to_log(logfilename):
-    '''
+    """
     Creates a logger object that outputs to a log file, to the filename specified,
     and also streams to console.
-    '''
+    """
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
-    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
-                                  datefmt='%y-%m-%d %H:%M:%S')
+    formatter = logging.Formatter(
+        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541
+        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541
+    )
     file_handler = logging.FileHandler(logfilename)
     file_handler.setFormatter(formatter)
     stream_handler = logging.StreamHandler()
@@ -25,6 +27,7 @@ def write_to_log(logfilename):
 
     return logger
 
+
 def make_ratio_cols(df, numerator, denominator, col_name, logger, operation="sum"):
     if col_name is not None:
         # If a user specify a column name, use it
@@ -32,40 +35,52 @@ def make_ratio_cols(df, numerator, denominator, col_name, logger, operation="sum
         if col_name in df.columns:
             logger.info(f"Dataframe already has column '{col_name}'")
             raise ValueError(f"Dataframe already has column '{col_name}'")
-            
+
         else:
             _col_name = col_name
-            
-    if operation == "sum":    
-        df = (df.groupby(['organization','mode', 'fiscal_year'])
-              .apply(lambda x: x.assign(**{_col_name:
-                     lambda x: x[numerator].sum() / x[denominator]}))
-                    )
+
+    if operation == "sum":
+        df = df.groupby(["organization", "mode", "fiscal_year"]).apply(
+            lambda x: x.assign(
+                **{_col_name: lambda x: x[numerator].sum() / x[denominator]}
+            )
+        )
     # else do not sum the numerator columns
     else:
-        df = (df.groupby(['organization','mode', 'fiscal_year'])
-              .apply(lambda x: x.assign(**{_col_name:
-                     lambda x: x[numerator] / x[denominator]}))
-                    )
+        df = df.groupby(["organization", "mode", "fiscal_year"]).apply(
+            lambda x: x.assign(**{_col_name: lambda x: x[numerator] / x[denominator]})
+        )
     return df
 
 
 def model(dbt, session):
     # Set up the logger object
-    logger = write_to_log('rr20_servicechecks_log.log')
-    
-    #Load data from BigQuery - pass in the dbt model that we draw from.
+    logger = write_to_log("rr20_servicechecks_log.log")
+
+    # Load data from BigQuery - pass in the dbt model that we draw from.
     allyears = dbt.ref("int_ntd_rr20_service_alldata")
     allyears = allyears.toPandas()
 
     # Calculate needed ratios, added as new columns
-    numeric_columns = allyears.select_dtypes(include=['number']).columns
+    numeric_columns = allyears.select_dtypes(include=["number"]).columns
     allyears[numeric_columns] = allyears[numeric_columns].fillna(0)
-    
-    allyears = make_ratio_cols(allyears, 'Total_Annual_Expenses_By_Mode', 'Annual_VRH', 'cost_per_hr', logger)
-    allyears = make_ratio_cols(allyears, 'Annual_VRM', 'VOMX', 'miles_per_veh', logger)
-    allyears = make_ratio_cols(allyears, 'Total_Annual_Expenses_By_Mode', 'Annual_UPT', 'fare_rev_per_trip', logger)
-    allyears = make_ratio_cols(allyears, 'Annual_VRM', 'Annual_VRH', 'rev_speed', logger, operation = "mean")
-    allyears = make_ratio_cols(allyears,  'Annual_UPT', 'Annual_VRH', 'trips_per_hr', logger, operation = "mean")
+
+    allyears = make_ratio_cols(
+        allyears, "Total_Annual_Expenses_By_Mode", "Annual_VRH", "cost_per_hr", logger
+    )
+    allyears = make_ratio_cols(allyears, "Annual_VRM", "VOMX", "miles_per_veh", logger)
+    allyears = make_ratio_cols(
+        allyears,
+        "Total_Annual_Expenses_By_Mode",
+        "Annual_UPT",
+        "fare_rev_per_trip",
+        logger,
+    )
+    allyears = make_ratio_cols(
+        allyears, "Annual_VRM", "Annual_VRH", "rev_speed", logger, operation="mean"
+    )
+    allyears = make_ratio_cols(
+        allyears, "Annual_UPT", "Annual_VRH", "trips_per_hr", logger, operation="mean"
+    )
 
     return allyears
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
index 7ce803848c..aa8006667d 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_validation.yml
@@ -2,18 +2,18 @@ version: 2
 
 models:
   - name: int_ntd_rr20_financial_fare_revenues
-    description: | 
+    description: |
       Setting up the RR-20 data for comparing fare revenues to previous year
     # tests:
     #   - dbt_utils.expression_is_true:
     #       expression: 'status != {{ guidelines_to_be_assessed_status() }}'
     # columns:
   - name: int_ntd_rr20_financial_specific_funds
-    description: | 
+    description: |
       Setting up the RR-20 data for comparing specific funding sources - the 5311 funds, and Other directly generated funds
       For NTD validation error ID #s RR20F-070, RR20F-065, RR20F-068, RR20F-066, RR20F-013
   - name: int_ntd_rr20_financial_total_exp
-    description: | 
+    description: |
       Setting up the RR-20 data for comparing totals, for operating and capital expenses, reported in different ares of the RR-20
       For NTD validation error ID #s RR20F-001OA, RR20F-001C, RR20F-182
   - name: int_ntd_rr20_service_alldata
@@ -21,9 +21,9 @@ models:
       Combines 2023 and 2022 data in preparation for doing NTD validation checks.
       The 2022 data was *not* from the API and so formatted differently
       We are *assuming* that data in 2024 and onwards will be the same format as 2023
-      If you get errors in 2024, check which columns may differ and read errors carefully. 
+      If you get errors in 2024, check which columns may differ and read errors carefully.
   - name: int_ntd_rr20_service_ratios
     description: |
       makes ratios for validation checks
     config:
-      materialized: table
\ No newline at end of file
+      materialized: table
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
index 583a340914..8e26482b98 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_a30_vomscheck.sql
@@ -4,18 +4,18 @@
 WITH rr20f_180 as (
     SELECT organization,
     "RR20F-180: VOMS across forms" as name_of_check,
-    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1) 
+    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1)
         THEN "Fail"
         ELSE "Pass"
     END as check_status,
-    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1) 
+    CASE WHEN ROUND(rr20_voms, 1) > ROUND(a30_vin_n, 1)
         THEN "Total VOMS is greater than total A-30 vehicles reported. Please clarify."
         ELSE "VOMS & A-30 vehicles reported are equal to and/or lower than active inventory."
     END as description,
     CONCAT("RR-20 VOMS = ", CAST(ROUND(rr20_voms, 1) AS STRING),
             "# A-30 VINs = ", CAST(ROUND(a30_vin_n, 1) AS STRING)) AS value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    FROM {{ ref('int_ntd_a30_voms_vins_totals') }} 
+    FROM {{ ref('int_ntd_a30_voms_vins_totals') }}
 )
 
 SELECT * from rr20f_180
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
index 7bc169dd25..ca9a74df74 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_equal_totals_check.sql
@@ -1,35 +1,35 @@
 --- We do identical CASE WHEN clauses in each CTE. The results determine 2 different column values but one can only specify 1 col/statement
 
 WITH rr20f_0010a as (
-    select 
+    select
     organization,
     "RR20F-001OA: equal totalsfor operating expenses" as name_of_check,
-    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0)) 
+    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0))
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0)) 
+    CASE WHEN (ROUND(Total_Annual_Op_Revenues_Expended,0) != ROUND(Total_Annual_Op_Expenses_by_Mode,0))
         THEN "Total_Annual_Revenues_Expended should, but does not, equal Total_Annual_Expenses_by_Mode. Please provide a narrative justification."
         ELSE ""
         END as description,
-    CONCAT("Total_Annual_Revenues_Expended = $", CAST(ROUND(Total_Annual_Op_Revenues_Expended,0) AS STRING), 
+    CONCAT("Total_Annual_Revenues_Expended = $", CAST(ROUND(Total_Annual_Op_Revenues_Expended,0) AS STRING),
             ",Total_Annual_Expenses_by_Mode = $", CAST(ROUND(Total_Annual_Op_Expenses_by_Mode,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
     FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
-), 
+),
 rr20f_001c as(
-    select 
+    select
     organization,
     "RR20F-001C: equal totals for capital expenses by mode and funding source expenditures" as name_of_check,
-    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) 
+    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0))
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0)) 
+    CASE WHEN (ROUND(Total_Annual_Cap_Expenses_byMode,0) != ROUND(Total_Annual_Cap_Expenses_byFunds,0))
         THEN "The sum of Total Expenses for all modes for Uses of Capital does not equal the sum of all values entered for Directly Generated, Non-Federal and Federal Government Funds for Uses of Capital. Please revise or explain."
         ELSE ""
         END as description,
-    CONCAT("Total_Annual_Cap_Expenses_byMode = $", CAST(ROUND(Total_Annual_Cap_Expenses_byMode,0) AS STRING), 
+    CONCAT("Total_Annual_Cap_Expenses_byMode = $", CAST(ROUND(Total_Annual_Cap_Expenses_byMode,0) AS STRING),
             ",Total_Annual_Cap_Expenses_byFunds = $", CAST(ROUND(Total_Annual_Cap_Expenses_byFunds,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
     FROM {{ ref('int_ntd_rr20_financial_total_exp') }}
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql
index 1e4c9ed6ca..72aac52e79 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_funds_checks.sql
@@ -1,38 +1,38 @@
 --- We do identical CASE WHEN clauses in each CTE. The results determine 2 different column values but one can only specify 1 col/statement
 
 WITH rr20f_070 as (
- select 
+ select
     organization,
     "RR20F-070: 5311 Funds not reported" as name_of_check,
-    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL 
+    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL 
+    CASE WHEN ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023) = 0  OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL
         THEN "The §5311 program is not listed as a revenue source in your report, Please double check and provide a narrative justification."
         ELSE ""
         END AS description,
     CONCAT("2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 ),
 rr20f_066  as (
-    select 
+    select
     organization,
     "RR20F-066: change from zero" as name_of_check,
-    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL) AND 
-                    (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)) 
+    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL)
+                    AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL))
                     OR
-                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL)) 
+                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL))
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL) AND 
-                    (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)) 
+    CASE WHEN ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NULL)
+                    AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL))
                     OR
-                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL)) 
+                ((FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 = 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NULL))
         THEN "FTA_Formula_Grants_for_Rural_Areas_5311 funding changed either from or to zero compared to last year. Please provide a narrative justification."
         ELSE ""
         END AS description,
@@ -42,78 +42,78 @@ rr20f_066  as (
     from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 ),
 rr20f_065 as (
- select 
+ select
     organization,
     "RR20F-065: 5311 Funds same value" as name_of_check,
-    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL) AND
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)     
+    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL) AND  
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL) AND
-                (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)     
+    CASE WHEN (FTA_Formula_Grants_for_Rural_Areas_5311_2023 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2022 != 0 OR FTA_Formula_Grants_for_Rural_Areas_5311_2022 IS NOT NULL)
+                AND (FTA_Formula_Grants_for_Rural_Areas_5311_2023 = FTA_Formula_Grants_for_Rural_Areas_5311_2022)
         THEN "You have identical values for FTA_Formula_Grants_for_Rural_Areas_5311 funding in 2022 and 2023, which is unusual. Please provide a narrative justification."
         ELSE ""
         END AS description,
     CONCAT("2022 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2022,0) AS STRING),
             "2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 ),
 rr20f_013 as (
-    select 
+    select
     organization,
     "RR20F-013: Other Directly Generated Funds same value" as name_of_check,
-    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL) AND  
-                (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL) AND
-                (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)     
+    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL)
+                AND (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL)
+                AND (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL) AND  
-                (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL) AND
-                (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)
+    CASE WHEN (Other_Directly_Generated_Funds_2023 != 0 OR Other_Directly_Generated_Funds_2023 IS NOT NULL)
+                AND (Other_Directly_Generated_Funds_2022 != 0 OR Other_Directly_Generated_Funds_2022 IS NOT NULL)
+                AND (Other_Directly_Generated_Funds_2023 = Other_Directly_Generated_Funds_2022)
         THEN "You have identical values for Other_Directly_Generated_Funds funding in 2022 and 2023, which is unusual. Please provide a narrative justification."
         ELSE ""
         END AS description,
     CONCAT("2022 = ", CAST(ROUND(Other_Directly_Generated_Funds_2022,0) AS STRING),
             "2023 = ", CAST(ROUND(Other_Directly_Generated_Funds_2023,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    from {{ ref('int_ntd_rr20_financial_specific_funds') }}  
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 ),
 rr20f_068 as (
- select 
+ select
     organization,
     "RR20F-068: 5311 Funds rounded to thousand" as name_of_check,
-    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL 
+    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL 
+    CASE WHEN MOD(CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS INT),1000) = 0  AND FTA_Formula_Grants_for_Rural_Areas_5311_2023 IS NOT NULL
     THEN "FTA_Formula_Grants_for_Rural_Areas_5311 are rounded to the nearest thousand, but should be reported as exact values. Please double check and provide a narrative justification."
         ELSE ""
         END AS description,
     CONCAT("2023 = ", CAST(ROUND(FTA_Formula_Grants_for_Rural_Areas_5311_2023,0) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    from {{ ref('int_ntd_rr20_financial_specific_funds') }}   
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 ),
 rr20f_024 as (
-    select 
+    select
     organization,
     "RR20F-024: Local Funds rounded to thousand" as name_of_check,
-    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0  AND Local_Funds_2023 IS NOT NULL 
+    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0  AND Local_Funds_2023 IS NOT NULL
         THEN "Fail"
         ELSE "Pass"
         END as check_status,
-    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0 AND Local_Funds_2023 IS NOT NULL 
+    CASE WHEN MOD(CAST(ROUND(Local_Funds_2023) AS INT),1000) = 0 AND Local_Funds_2023 IS NOT NULL
         THEN "Local Funds are rounded to the nearest thousand, but should be reported as exact values. Please double check and provide a narrative justification."
         ELSE ""
         END AS description,
     CONCAT("2023 = ", CAST(ROUND(Local_Funds_2023) AS STRING)) as value_checked,
     CURRENT_TIMESTAMP() AS date_checked
-    from {{ ref('int_ntd_rr20_financial_specific_funds') }}  
+    from {{ ref('int_ntd_rr20_financial_specific_funds') }}
 )
 
 SELECT * FROM rr20f_070
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
index 750fffb6f9..13d47fe797 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
@@ -1,18 +1,22 @@
-import pandas as pd
 import datetime
 import logging
 
-##### TO_DO: see if the missing data check can still work or did we already fill it with zeros
+import pandas as pd
+
+# TO_DO: see if the missing data check can still work or did we already fill it with zeros
+
 
 def write_to_log(logfilename):
-    '''
+    """
     Creates a logger object that outputs to a log file, to the filename specified,
     and also streams to console.
-    '''
+    """
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
-    formatter = logging.Formatter(f'%(asctime)s:%(levelname)s: %(message)s',
-                                  datefmt='%y-%m-%d %H:%M:%S')
+    formatter = logging.Formatter(
+        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541
+        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541
+    )
     file_handler = logging.FileHandler(logfilename)
     file_handler.setFormatter(formatter)
     stream_handler = logging.StreamHandler()
@@ -26,52 +30,75 @@ def write_to_log(logfilename):
 
 
 def check_rr20_ratios(df, variable, threshold, this_year, last_year, logger):
-    '''Validation checks where a ratio must be within a certain threshold limit
-    compared to the previous year.'''
-    agencies = df['organization'].unique()
+    """Validation checks where a ratio must be within a certain threshold limit
+    compared to the previous year."""
+    agencies = df["organization"].unique()
     output = []
     for agency in agencies:
-        agency_df = df[df['organization']==agency]
+        agency_df = df[df["organization"] == agency]
         logger.info(f"Checking {agency} for {variable} info.")
         if len(agency_df) > 0:
-            
             # Check whether data for both years is present
-            if (len(agency_df[agency_df['fiscal_year']==this_year]) > 0) \
-                & (len(agency_df[agency_df['fiscal_year']==last_year]) > 0): 
-
-                for mode in agency_df[(agency_df['fiscal_year']==this_year)]['mode'].unique():
-                    value_thisyr = (round(agency_df[(agency_df['mode']==mode)
-                                          & (agency_df['fiscal_year'] == this_year)]
-                                  [variable].unique()[0], 2))
-                    if len(agency_df[(agency_df['mode']==mode) & (agency_df['fiscal_year'] == last_year)][variable]) == 0:
+            if (len(agency_df[agency_df["fiscal_year"] == this_year]) > 0) & (
+                len(agency_df[agency_df["fiscal_year"] == last_year]) > 0
+            ):
+                for mode in agency_df[(agency_df["fiscal_year"] == this_year)][
+                    "mode"
+                ].unique():
+                    value_thisyr = round(
+                        agency_df[
+                            (agency_df["mode"] == mode)
+                            & (agency_df["fiscal_year"] == this_year)
+                        ][variable].unique()[0],
+                        2,
+                    )
+                    if (
+                        len(
+                            agency_df[
+                                (agency_df["mode"] == mode)
+                                & (agency_df["fiscal_year"] == last_year)
+                            ][variable]
+                        )
+                        == 0
+                    ):
                         value_lastyr = 0
                     else:
-                        value_lastyr = (round(agency_df[(agency_df['mode']==mode)
-                                          & (agency_df['fiscal_year'] == last_year)]
-                                  [variable].unique()[0], 2))
-                    
-                    if (value_lastyr == 0) and (abs(value_thisyr - value_lastyr) >= threshold):
+                        value_lastyr = round(
+                            agency_df[
+                                (agency_df["mode"] == mode)
+                                & (agency_df["fiscal_year"] == last_year)
+                            ][variable].unique()[0],
+                            2,
+                        )
+
+                    if (value_lastyr == 0) and (
+                        abs(value_thisyr - value_lastyr) >= threshold
+                    ):
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = (f"The {variable} for {mode} has changed from last year by > = {threshold*100}%, please provide a narrative justification.")
-                    elif (value_lastyr != 0) and abs((value_lastyr - value_thisyr)/value_lastyr) >= threshold:
+                        description = f"The {variable} for {mode} has changed from last year by > = {threshold*100}%, please provide a narrative justification."
+                    elif (value_lastyr != 0) and abs(
+                        (value_lastyr - value_thisyr) / value_lastyr
+                    ) >= threshold:
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = (f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%, please provide a narrative justification.")
+                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%, please provide a narrative justification."
                     else:
                         result = "pass"
                         check_name = f"{variable}"
                         mode = mode
                         description = ""
 
-                    output_line = {"Organization": agency,
-                                   "name_of_check" : check_name,
-                                   "mode": mode,
-                                   "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
-                                   "check_status": result,
-                                   "Description": description}
+                    output_line = {
+                        "Organization": agency,
+                        "name_of_check": check_name,
+                        "mode": mode,
+                        "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
+                        "check_status": result,
+                        "Description": description,
+                    }
                     output.append(output_line)
         else:
             logger.info(f"There is no data for {agency}")
@@ -79,70 +106,115 @@ def check_rr20_ratios(df, variable, threshold, this_year, last_year, logger):
     return checks
 
 
-def check_single_number(df, variable, this_year, last_year, logger, threshold=None,):
-    '''Validation checks where a single number must be within a certain threshold limit
-    compared to the previous year.'''
-    agencies = df['organization'].unique()
+def check_single_number(
+    df,
+    variable,
+    this_year,
+    last_year,
+    logger,
+    threshold=None,
+):
+    """Validation checks where a single number must be within a certain threshold limit
+    compared to the previous year."""
+    agencies = df["organization"].unique()
     output = []
     for agency in agencies:
-
-        if len(df[df['organization']==agency]) > 0:
+        if len(df[df["organization"] == agency]) > 0:
             logger.info(f"Checking {agency} for {variable} info.")
             # Check whether data for both years is present, if so perform prior yr comparison.
-            if (len(df[(df['organization']==agency) & (df['fiscal_year']==this_year)]) > 0) \
-                & (len(df[(df['organization']==agency) & (df['fiscal_year']==last_year)]) > 0): 
-
-                for mode in df[(df['organization'] == agency) & (df['fiscal_year']==this_year)]['mode'].unique():
-                    value_thisyr = (round(df[(df['organization'] == agency) 
-                                          & (df['mode']==mode)
-                                          & (df['fiscal_year'] == this_year)]
-                                  [variable].unique()[0], 2))
+            if (
+                len(
+                    df[
+                        (df["organization"] == agency)
+                        & (df["fiscal_year"] == this_year)
+                    ]
+                )
+                > 0
+            ) & (
+                len(
+                    df[
+                        (df["organization"] == agency)
+                        & (df["fiscal_year"] == last_year)
+                    ]
+                )
+                > 0
+            ):
+                for mode in df[
+                    (df["organization"] == agency) & (df["fiscal_year"] == this_year)
+                ]["mode"].unique():
+                    value_thisyr = round(
+                        df[
+                            (df["organization"] == agency)
+                            & (df["mode"] == mode)
+                            & (df["fiscal_year"] == this_year)
+                        ][variable].unique()[0],
+                        2,
+                    )
                     # If there's no data for last yr:
-                    if len(df[(df['organization'] == agency) 
-                                          & (df['mode']==mode)
-                                          & (df['fiscal_year'] == last_year)][variable]) == 0:
+                    if (
+                        len(
+                            df[
+                                (df["organization"] == agency)
+                                & (df["mode"] == mode)
+                                & (df["fiscal_year"] == last_year)
+                            ][variable]
+                        )
+                        == 0
+                    ):
                         value_lastyr = 0
                     else:
-                        value_lastyr = (round(df[(df['organization'] == agency) 
-                                          & (df['mode']==mode)
-                                          & (df['fiscal_year'] == last_year)]
-                                  [variable].unique()[0], 2))
-                    
-                    if (round(value_thisyr)==0 and round(value_lastyr) != 0) | (round(value_thisyr)!=0 and round(value_lastyr) == 0):
+                        value_lastyr = round(
+                            df[
+                                (df["organization"] == agency)
+                                & (df["mode"] == mode)
+                                & (df["fiscal_year"] == last_year)
+                            ][variable].unique()[0],
+                            2,
+                        )
+
+                    if (round(value_thisyr) == 0 and round(value_lastyr) != 0) | (
+                        round(value_thisyr) != 0 and round(value_lastyr) == 0
+                    ):
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = (f"The {variable} for {mode} has changed either from or to zero compared to last year. Please provide a narrative justification.")
+                        description = f"The {variable} for {mode} has changed either from or to zero compared to last year. Please provide a narrative justification."
                     # run only the above check on whether something changed from zero to non-zero, if no threshold is given
-                    elif threshold==None:
+                    elif threshold is None:
                         result = "pass"
                         check_name = f"{variable}"
                         mode = mode
                         description = ""
                         pass
                     # also check for pct change, if a threshold parameter is passed into function
-                    elif (value_lastyr == 0) and (abs(value_thisyr - value_lastyr) >= threshold):
+                    elif (value_lastyr == 0) and (
+                        abs(value_thisyr - value_lastyr) >= threshold
+                    ):
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = (f"The {variable} for {mode} was 0 last year and has changed by > = {threshold*100}%, please provide a narrative justification.")
-                    elif (value_lastyr != 0) and abs((value_lastyr - value_thisyr)/value_lastyr) >= threshold:
+                        description = f"The {variable} for {mode} was 0 last year and has changed by > = {threshold*100}%, please provide a narrative justification."
+                    elif (value_lastyr != 0) and abs(
+                        (value_lastyr - value_thisyr) / value_lastyr
+                    ) >= threshold:
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = (f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%; please provide a narrative justification.")                        
+                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%; please provide a narrative justification."
                     else:
                         result = "pass"
                         check_name = f"{variable}"
                         mode = mode
                         description = ""
 
-                    output_line = {"Organization": agency,
-                           "name_of_check" : check_name,
-                                   "mode": mode,
-                            "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
-                            "check_status": result,
-                            "Description": description}
+                    output_line = {
+                        "Organization": agency,
+                        "name_of_check": check_name,
+                        "mode": mode,
+                        "value_checked": f"{this_year} = {value_thisyr}, {last_year} = {value_lastyr}",
+                        "check_status": result,
+                        "Description": description,
+                    }
                     output.append(output_line)
         else:
             logger.info(f"There is no data for {agency}")
@@ -152,34 +224,54 @@ def check_single_number(df, variable, this_year, last_year, logger, threshold=No
 
 def model(dbt, session):
     # Set up the logger object
-    logger = write_to_log('rr20_ftc_servicechecks_log.log')
+    logger = write_to_log("rr20_ftc_servicechecks_log.log")
 
-    this_year=datetime.datetime.now().year
-    last_year = this_year-1
-    this_date=datetime.datetime.now().date().strftime('%Y-%m-%d') #for suffix on Excel files
+    this_year = datetime.datetime.now().year
+    last_year = this_year - 1
+    this_date = (
+        datetime.datetime.now().date().strftime("%Y-%m-%d")
+    )  # for suffix on Excel files
 
-    #Load data from BigQuery - pass in the dbt model that we draw from.
+    # Load data from BigQuery - pass in the dbt model that we draw from.
     allyears = dbt.ref("int_ntd_rr20_service_ratios")
     allyears = allyears.toPandas()
 
     # Run validation checks
-    cph_checks = check_rr20_ratios(allyears, 'cost_per_hr', .30, this_year, last_year, logger)
-    mpv_checks = check_rr20_ratios(allyears, 'miles_per_veh', .20, this_year, last_year, logger)
-    vrm_checks = check_single_number(allyears, 'Annual_VRM', this_year, last_year, logger, threshold=.30)
-    frpt_checks = check_rr20_ratios(allyears, 'fare_rev_per_trip', .25, this_year, last_year, logger)
-    rev_speed_checks = check_rr20_ratios(allyears, 'rev_speed', .15, this_year, last_year, logger)
-    tph_checks = check_rr20_ratios(allyears, 'trips_per_hr', .30, this_year, last_year, logger)
-    voms0_check = check_single_number(allyears, 'VOMX', this_year, last_year, logger)
+    cph_checks = check_rr20_ratios(
+        allyears, "cost_per_hr", 0.30, this_year, last_year, logger
+    )
+    mpv_checks = check_rr20_ratios(
+        allyears, "miles_per_veh", 0.20, this_year, last_year, logger
+    )
+    vrm_checks = check_single_number(
+        allyears, "Annual_VRM", this_year, last_year, logger, threshold=0.30
+    )
+    frpt_checks = check_rr20_ratios(
+        allyears, "fare_rev_per_trip", 0.25, this_year, last_year, logger
+    )
+    rev_speed_checks = check_rr20_ratios(
+        allyears, "rev_speed", 0.15, this_year, last_year, logger
+    )
+    tph_checks = check_rr20_ratios(
+        allyears, "trips_per_hr", 0.30, this_year, last_year, logger
+    )
+    voms0_check = check_single_number(allyears, "VOMX", this_year, last_year, logger)
 
     # Combine checks into one table
-    rr20_checks = pd.concat([cph_checks, mpv_checks, vrm_checks, 
-                             frpt_checks, rev_speed_checks, 
-                             tph_checks, voms0_check], 
-                             ignore_index=True).sort_values(by="Organization")
+    rr20_checks = pd.concat(
+        [
+            cph_checks,
+            mpv_checks,
+            vrm_checks,
+            frpt_checks,
+            rev_speed_checks,
+            tph_checks,
+            voms0_check,
+        ],
+        ignore_index=True,
+    ).sort_values(by="Organization")
 
     logger.info(f"RR-20 service data checks conducted on {this_date} is complete!")
 
-    ## Part 2: send table to BigQuery
+    # Part 2: send table to BigQuery
     return rr20_checks
-
-
diff --git a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
index 1dc10d2e19..346169b267 100644
--- a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
+++ b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
@@ -3,8 +3,8 @@ version: 2
 sources:
   - name: ntd_report_validation
     description: |
-      Data from BlackCat API. Each org's data is be in 1 row, and for each separate table in the API, 
-            a nested column holds all of it's data. 
+      Data from BlackCat API. Each org's data is be in 1 row, and for each separate table in the API,
+            a nested column holds all of it's data.
     database: "{{ env_var('DBT_SOURCE_DATABASE', var('SOURCE_DATABASE')) }}"
     schema: external_blackcat
     tables:
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
index c2fbca6ae8..2c6bcd5a38 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_exp_by_mode.sql
@@ -1,5 +1,6 @@
 --- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
 --- We pull these tables in to use them in later int and fct models
-SELECT 
-    * 
+-- TODO: enumerate columns
+SELECT -- noqa: AM04
+    *
 FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_expenses_by_mode`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
index 5f465071db..6afc02d872 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_financial.sql
@@ -1,5 +1,6 @@
 --- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
 --- We pull these tables in to use them in later int and fct models
-SELECT 
-    * 
+-- TODO: enumerate columns
+SELECT -- noqa: AM04
+    *
 FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_financials__2`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
index 64c17c9b43..770028f71c 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2022_rr20_service.sql
@@ -1,5 +1,6 @@
 --- One-time data ingest of 2022 data, whose pattern  which will not be repeated in the future
 --- We pull these tables in to use them in later int and fct models
-SELECT 
-    * 
+-- TODO: enumerate columns
+SELECT -- noqa: AM04
+    *
 FROM `cal-itp-data-infra.blackcat_raw.2022_rr20_service_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
index 6ecc277392..7f84f3af60 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
   organization,
   reportstatus as api_report_status,
   TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
@@ -14,5 +14,5 @@ SELECT
   a10.DOLeasedByPublicAgency as do_leased_by_public_agency,
   a10.DOLeasedFromPrivateEntity as do_leased_from_private_entity,
   a10.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdreportingstationsandmaintenance_data`) as `a10`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
+  UNNEST(`ntdreportingstationsandmaintenance_data`) as `a10`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
index 223c2e104a..5925b966e9 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
   organization,
   reportstatus as api_report_status,
   TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
@@ -23,5 +23,5 @@ SELECT
   a30.ModesOperatedDisplayText as modes_operated_display_text,
   a30.ModesOperatedFullText as modes_operated_full_text,
   a30.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdassetandresourceinfo_data`) as `a30`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
+  UNNEST(`ntdassetandresourceinfo_data`) as `a30`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
index aeaec900d7..1a49ff9b3a 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
   organization,
   reportstatus as api_report_status,
   TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
@@ -19,5 +19,5 @@ SELECT
   ntdreportingrr20_rural_data.SponsoredServiceUPT as sponsored_service_upt,
   ntdreportingrr20_rural_data.Quantity as quantity,
   ntdreportingrr20_rural_data.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
-, UNNEST (`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
+  UNNEST(`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
index 003c00b758..224860c383 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
@@ -1,4 +1,4 @@
-SELECT 
+SELECT
   organization,
   reportstatus as api_report_status,
   TIMESTAMP_MILLIS(reportlastmodifieddate) as api_report_last_modified_date,
@@ -11,6 +11,6 @@ SELECT
   ntdreportingrr20_urban_tribal_data.CapitalExpended as capital_expended,
   ntdreportingrr20_urban_tribal_data.Description as description,
   ntdreportingrr20_urban_tribal_data.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }}
+FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
 -- `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
-, UNNEST (`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
+  UNNEST(`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
index b935ffbd64..adde256f4d 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
@@ -1,3 +1,3 @@
 SELECT
-    Organization as organization 
+    Organization as organization
 FROM blackcat_raw.2023_organizations

From 7eed7353b04ada31f4c0322585f9033dbf975541 Mon Sep 17 00:00:00 2001
From: Laurie Merrell <laurie.m@jarv.us>
Date: Thu, 7 Dec 2023 09:57:51 -0600
Subject: [PATCH 14/15] more linter

---
 .../ntd_validation/int_ntd_rr20_service_ratios.py    |  4 ++--
 .../ntd_validation/fct_ntd_rr20_service_checks.py    | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
index a20e7e16d1..b4265d74d6 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
@@ -13,8 +13,8 @@ def write_to_log(logfilename):
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
     formatter = logging.Formatter(
-        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541
-        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541
+        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541, E231
+        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541, E231
     )
     file_handler = logging.FileHandler(logfilename)
     file_handler.setFormatter(formatter)
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
index 13d47fe797..3a92a04e4c 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
@@ -14,8 +14,8 @@ def write_to_log(logfilename):
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
     formatter = logging.Formatter(
-        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541
-        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541
+        f"%(asctime)s:%(levelname)s: %(message)s",  # noqa: F541, E231
+        datefmt="%y-%m-%d %H:%M:%S",  # noqa: F541, E231
     )
     file_handler = logging.FileHandler(logfilename)
     file_handler.setFormatter(formatter)
@@ -77,14 +77,14 @@ def check_rr20_ratios(df, variable, threshold, this_year, last_year, logger):
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = f"The {variable} for {mode} has changed from last year by > = {threshold*100}%, please provide a narrative justification."
+                        description = f"The {variable} for {mode} has changed from last year by > = {threshold * 100}%, please provide a narrative justification."
                     elif (value_lastyr != 0) and abs(
                         (value_lastyr - value_thisyr) / value_lastyr
                     ) >= threshold:
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%, please provide a narrative justification."
+                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr) / value_lastyr) * 100, 1)}%, please provide a narrative justification."
                     else:
                         result = "pass"
                         check_name = f"{variable}"
@@ -193,14 +193,14 @@ def check_single_number(
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = f"The {variable} for {mode} was 0 last year and has changed by > = {threshold*100}%, please provide a narrative justification."
+                        description = f"The {variable} for {mode} was 0 last year and has changed by > = {threshold * 100}%, please provide a narrative justification."
                     elif (value_lastyr != 0) and abs(
                         (value_lastyr - value_thisyr) / value_lastyr
                     ) >= threshold:
                         result = "fail"
                         check_name = f"{variable}"
                         mode = mode
-                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr)/value_lastyr)*100, 1)}%; please provide a narrative justification."
+                        description = f"The {variable} for {mode} has changed from last year by {round(abs((value_lastyr - value_thisyr) / value_lastyr) * 100, 1)}%; please provide a narrative justification."  # noqa: E702
                     else:
                         result = "pass"
                         check_name = f"{variable}"

From 91e7fad7b0242e1b8121df030d48ef3005f4fde5 Mon Sep 17 00:00:00 2001
From: Kim Engie <kim.engie@slalom.com>
Date: Thu, 7 Dec 2023 08:51:27 -0800
Subject: [PATCH 15/15] fix conflicts, final ratios corrections

---
 .../int_ntd_rr20_service_alldata.sql          | 72 ++++++++++++---
 .../int_ntd_rr20_service_ratios.py            | 91 ++++++++++---------
 .../fct_ntd_rr20_service_checks.py            |  2 +-
 .../ntd_validation/_src_api_externaltable.yml |  2 +-
 .../ntd_validation/stg_ntd_2023_a10.sql       |  4 +-
 .../stg_ntd_2023_a30_assetandresourceinfo.sql |  4 +-
 .../stg_ntd_2023_rr20_rural.sql               |  4 +-
 .../stg_ntd_2023_rr20_urban_tribal.sql        |  4 +-
 .../ntd_validation/stg_ntd_subrecipients.sql  |  2 +-
 9 files changed, 120 insertions(+), 65 deletions(-)

diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
index 20a14257ad..0b40f6487c 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_alldata.sql
@@ -8,25 +8,62 @@
 
 ---TO DO: insert parameter for loop, for each year, do what 2023 is doing,
 --- and at the end, add another union statement
-with data_2023 as (
+with service_2023 as (
     select
     organization,
     api_report_period as fiscal_year,
     item as mode,
-    description as operating_capital,
-    CASE
-        WHEN description = "Operating Expenses" THEN operations_expended
-        WHEN description = "Capital Expenses" THEN capital_expended
-    END as Total_Annual_Expenses_By_Mode,
     annual_vehicle_rev_miles as Annual_VRM,
     annual_vehicle_rev_hours as Annual_VRH,
     annual_unlinked_pass_trips as Annual_UPT,
     sponsored_service_upt as Sponsored_UPT,
     annual_vehicle_max_service as VOMX
     from {{ ref('stg_ntd_2023_rr20_rural') }}
+    WHERE type = "Service Data"
+),
+
+expenses_2023 as (
+    select
+    organization,
+    api_report_period as fiscal_year,
+    item as mode,
+    operations_expended as Total_Annual_Expenses_By_Mode
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
     WHERE type = "Expenses by Mode"
 ),
 
+fare_rev_2023 as (
+    select
+    organization,
+    api_report_period as fiscal_year,
+    sum(operations_expended) as Fare_Revenues
+    from {{ ref('stg_ntd_2023_rr20_rural') }}
+    WHERE type = "Fare Revenues"
+    GROUP BY organization, fiscal_year
+),
+
+all_2023 as (
+    SELECT DISTINCT
+        service_2023.organization,
+        service_2023.fiscal_year,
+        service_2023.mode,
+        expenses_2023.Total_Annual_Expenses_By_Mode,
+        service_2023.Annual_VRM,
+        service_2023.Annual_VRH,
+        service_2023.Annual_UPT,
+        service_2023.Sponsored_UPT,
+        service_2023.VOMX,
+        fare_rev_2023.Fare_Revenues
+    FROM service_2023
+    FULL OUTER JOIN expenses_2023
+        ON service_2023.organization = expenses_2023.organization
+        AND service_2023.fiscal_year = expenses_2023.fiscal_year
+        AND service_2023.mode = expenses_2023.mode
+    FULL OUTER JOIN fare_rev_2023
+        ON service_2023.organization = fare_rev_2023.organization
+        AND service_2023.fiscal_year = fare_rev_2023.fiscal_year
+),
+
 service2022 as (
     select
     Organization_Legal_Name as organization,
@@ -44,32 +81,45 @@ expenses2022 as (
     select
     Organization_Legal_Name as organization,
     Fiscal_Year as  fiscal_year,
-    Operating_Capital as operating_capital,
     Mode as mode,
     Total_Annual_Expenses_By_Mode
     FROM {{ ref('stg_ntd_2022_rr20_exp_by_mode') }}
+    WHERE Operating_Capital = "Operating"
+),
+
+fare_rev_2022 as (
+    select
+    Organization_Legal_Name as organization,
+    Fiscal_Year as  fiscal_year,
+    Fare_Revenues
+    FROM {{ ref('stg_ntd_2022_rr20_financial') }}
+    WHERE Operating_Capital = "Operating"
 ),
 
 all_2022 as (
-    select service2022.organization,
+    SELECT DISTINCT
+     service2022.organization,
         service2022.fiscal_year,
         service2022.mode,
-        expenses2022.operating_capital,
         expenses2022.Total_Annual_Expenses_By_Mode,
         service2022.Annual_VRM,
         service2022.Annual_VRH,
         service2022.Annual_UPT,
         service2022.Sponsored_UPT,
-        service2022.VOMX
+        service2022.VOMX,
+        fare_rev_2022.Fare_Revenues
 from service2022
 FULL OUTER JOIN expenses2022
     ON service2022.organization = expenses2022.organization
     AND service2022.fiscal_year = expenses2022.fiscal_year
     AND service2022.mode = expenses2022.mode
+INNER JOIN fare_rev_2022
+    ON service2022.organization = fare_rev_2022.organization
+    AND service2022.fiscal_year = fare_rev_2022.fiscal_year
 )
 
 select * FROM all_2022
 
 UNION ALL
 
-select * from data_2023
+select * from all_2023
diff --git a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
index b4265d74d6..c8e3227b2f 100644
--- a/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
+++ b/warehouse/models/intermediate/ntd_validation/int_ntd_rr20_service_ratios.py
@@ -1,9 +1,5 @@
 import logging
 
-import pandas as pd  # noqa: F401
-import pyspark  # noqa: F401
-import pyspark.sql.functions as F  # noqa: F401
-
 
 def write_to_log(logfilename):
     """
@@ -28,31 +24,6 @@ def write_to_log(logfilename):
     return logger
 
 
-def make_ratio_cols(df, numerator, denominator, col_name, logger, operation="sum"):
-    if col_name is not None:
-        # If a user specify a column name, use it
-        # Raise error if the column already exists
-        if col_name in df.columns:
-            logger.info(f"Dataframe already has column '{col_name}'")
-            raise ValueError(f"Dataframe already has column '{col_name}'")
-
-        else:
-            _col_name = col_name
-
-    if operation == "sum":
-        df = df.groupby(["organization", "mode", "fiscal_year"]).apply(
-            lambda x: x.assign(
-                **{_col_name: lambda x: x[numerator].sum() / x[denominator]}
-            )
-        )
-    # else do not sum the numerator columns
-    else:
-        df = df.groupby(["organization", "mode", "fiscal_year"]).apply(
-            lambda x: x.assign(**{_col_name: lambda x: x[numerator] / x[denominator]})
-        )
-    return df
-
-
 def model(dbt, session):
     # Set up the logger object
     logger = write_to_log("rr20_servicechecks_log.log")
@@ -60,27 +31,61 @@ def model(dbt, session):
     # Load data from BigQuery - pass in the dbt model that we draw from.
     allyears = dbt.ref("int_ntd_rr20_service_alldata")
     allyears = allyears.toPandas()
+    logger.info("Service data loaded!")
 
     # Calculate needed ratios, added as new columns
     numeric_columns = allyears.select_dtypes(include=["number"]).columns
-    allyears[numeric_columns] = allyears[numeric_columns].fillna(0)
+    allyears[numeric_columns] = allyears[numeric_columns].fillna(
+        value=0, inplace=False, axis=1
+    )
 
-    allyears = make_ratio_cols(
-        allyears, "Total_Annual_Expenses_By_Mode", "Annual_VRH", "cost_per_hr", logger
+    # Cost per hr
+    allyears2 = (
+        allyears.groupby(["organization", "mode", "fiscal_year"], dropna=False)
+        .apply(
+            lambda x: x.assign(
+                cost_per_hr=x["Total_Annual_Expenses_By_Mode"] / x["Annual_VRH"]
+            )
+        )
+        .reset_index(drop=True)
     )
-    allyears = make_ratio_cols(allyears, "Annual_VRM", "VOMX", "miles_per_veh", logger)
-    allyears = make_ratio_cols(
-        allyears,
-        "Total_Annual_Expenses_By_Mode",
-        "Annual_UPT",
-        "fare_rev_per_trip",
-        logger,
+    # Miles per vehicle
+    allyears2 = (
+        allyears2.groupby(["organization", "mode", "fiscal_year"], dropna=False)
+        .apply(
+            lambda x: x.assign(
+                miles_per_veh=lambda x: x["Annual_VRM"].sum() / x["VOMX"]
+            )
+        )
+        .reset_index(drop=True)
     )
-    allyears = make_ratio_cols(
-        allyears, "Annual_VRM", "Annual_VRH", "rev_speed", logger, operation="mean"
+    # Fare revenues
+    allyears2 = (
+        allyears2.groupby(["organization", "mode", "fiscal_year"], dropna=False)
+        .apply(
+            lambda x: x.assign(
+                fare_rev_per_trip=lambda x: x["Fare_Revenues"].sum() / x["Annual_UPT"]
+            )
+        )
+        .reset_index(drop=True)
     )
-    allyears = make_ratio_cols(
-        allyears, "Annual_UPT", "Annual_VRH", "trips_per_hr", logger, operation="mean"
+    # Revenue Speed
+    allyears2 = (
+        allyears2.groupby(["organization", "mode", "fiscal_year"], dropna=False)
+        .apply(
+            lambda x: x.assign(rev_speed=lambda x: x["Annual_VRM"] / x["Annual_VRH"])
+        )
+        .reset_index(drop=True)
     )
+    # Trips per hr
+    allyears2 = (
+        allyears2.groupby(["organization", "mode", "fiscal_year"], dropna=False)
+        .apply(
+            lambda x: x.assign(trips_per_hr=lambda x: x["Annual_UPT"] / x["Annual_VRH"])
+        )
+        .reset_index(drop=True)
+    )
+
+    logger.info("Ratios calculated!")
 
     return allyears
diff --git a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
index 3a92a04e4c..adc4edf1c6 100644
--- a/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
+++ b/warehouse/models/mart/ntd_validation/fct_ntd_rr20_service_checks.py
@@ -273,5 +273,5 @@ def model(dbt, session):
 
     logger.info(f"RR-20 service data checks conducted on {this_date} is complete!")
 
-    # Part 2: send table to BigQuery
+    # Send table to BigQuery
     return rr20_checks
diff --git a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
index 346169b267..6d7ef7d398 100644
--- a/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
+++ b/warehouse/models/staging/ntd_validation/_src_api_externaltable.yml
@@ -8,4 +8,4 @@ sources:
     database: "{{ env_var('DBT_SOURCE_DATABASE', var('SOURCE_DATABASE')) }}"
     schema: external_blackcat
     tables:
-      - name: all_2023_ntdreports
+      - name: all_ntdreports
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
index 7f84f3af60..f6e2c33054 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a10.sql
@@ -14,5 +14,5 @@ SELECT
   a10.DOLeasedByPublicAgency as do_leased_by_public_agency,
   a10.DOLeasedFromPrivateEntity as do_leased_from_private_entity,
   a10.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
-  UNNEST(`ntdreportingstationsandmaintenance_data`) as `a10`
+FROM {{ source('ntd_report_validation', 'all_ntdreports') }},
+ UNNEST(`ntdreportingstationsandmaintenance_data`) as `a10`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
index 5925b966e9..bf685f194a 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_a30_assetandresourceinfo.sql
@@ -23,5 +23,5 @@ SELECT
   a30.ModesOperatedDisplayText as modes_operated_display_text,
   a30.ModesOperatedFullText as modes_operated_full_text,
   a30.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
-  UNNEST(`ntdassetandresourceinfo_data`) as `a30`
+FROM {{ source('ntd_report_validation', 'all_ntdreports') }},
+ UNNEST(`ntdassetandresourceinfo_data`) as `a30`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
index 1a49ff9b3a..f40457e2f0 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_rural.sql
@@ -19,5 +19,5 @@ SELECT
   ntdreportingrr20_rural_data.SponsoredServiceUPT as sponsored_service_upt,
   ntdreportingrr20_rural_data.Quantity as quantity,
   ntdreportingrr20_rural_data.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
-  UNNEST(`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
+FROM {{ source('ntd_report_validation', 'all_ntdreports') }},
+ UNNEST(`ntdreportingrr20_rural_data`) as `ntdreportingrr20_rural_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
index 224860c383..02b61c3728 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_2023_rr20_urban_tribal.sql
@@ -11,6 +11,6 @@ SELECT
   ntdreportingrr20_urban_tribal_data.CapitalExpended as capital_expended,
   ntdreportingrr20_urban_tribal_data.Description as description,
   ntdreportingrr20_urban_tribal_data.LastModifiedDate as last_modified_date
-FROM {{ source('ntd_report_validation', 'all_2023_ntdreports') }},
+FROM {{ source('ntd_report_validation', 'all_ntdreports') }},
 -- `cal-itp-data-infra-staging.external_blackcat.all_2023_ntdreports`
-  UNNEST(`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
+ UNNEST(`ntdreportingrr20_urban_tribal_data`) as `ntdreportingrr20_urban_tribal_data`
diff --git a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
index adde256f4d..c0282b37b4 100644
--- a/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
+++ b/warehouse/models/staging/ntd_validation/stg_ntd_subrecipients.sql
@@ -1,3 +1,3 @@
 SELECT
     Organization as organization
-FROM blackcat_raw.2023_organizations
+FROM `cal-itp-data-infra.blackcat_raw.2023_organizations`