Skip to content

Commit

Permalink
Merge pull request #86 from datakind/test_type_expression_is_true
Browse files Browse the repository at this point in the history
test type associated columns not null
  • Loading branch information
JanPeterDatakind authored Jun 15, 2023
2 parents b5bc0f8 + bfc17d5 commit 6e5d741
Show file tree
Hide file tree
Showing 13 changed files with 720 additions and 614 deletions.
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,9 @@ generated one.
7. `associated_columns_not_null`
<br><br>
```
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'd74fc600-31c3-307d-9501-5b7f6b09aff5', 'MISSING-1', 3, '',
'', '', 'ancview_pregnancy', 'associated_columns_not_null', 'diarrhea_dx', 'diarrhea diagnosis',
$${"name": "diarrhea_dx_has_duration", "col_value": True, "associated_columns": ['max_symptom_duration']}$$,
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'd74fc600-31c3-307d-9501-5b7f6b09aff5', 'MISSING-1', 3,
'diarrhea diagnosis', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'associated_columns_not_null', '', '',
$${"name": "diarrhea_dx_has_duration", "condition": "diarrhea_dx = True", "associated_columns": ['max_symptom_duration']}$$,
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'your-name');
```
8. `expect_similar_means_across_reporters`
Expand Down Expand Up @@ -883,6 +883,7 @@ need to do is ..
1. `exec -it dot /bin/bash`
2. `pytest dot/self_tests/unit`
3. `pytest dot/self_tests/integration`
##### On your local machine
Expand Down Expand Up @@ -922,11 +923,17 @@ ScanProjec1_db:
And finally you can run the tests from a terminal as follows:
```
pytest dot/self_tests/unit
pytest dot/self_tests/integration
```
#### Guidelines for adding new tests
- Existing tests are at [the self-tests folder](dot/self_tests/unit)
- All tests extend the [test base class](dot/self_tests/unit/base_self_test_class.py) that
- Existing unit tests are at [the self-tests folder](dot/self_tests/unit)
- When a function needs to be modified, ideally it will hava a passing test beforehand; if not, please consider adding it
- One integration test at [the integration self-tests folder](dot/self_tests/unit) that
- instead of running unit test for functions, it runs the full dot pipeline for the fake data and checks results
- it runs all the tests configured in [sample_dot_data.sql](db/dot/4-upload_sample_dot_data.sql)
- whenever a new test type is designed for dot, consider adding a line to the SQL above so that it gets tested
- All tests (both unit & integration) extend the [test base class](dot/self_tests/unit/base_self_test_class.py) that
- facilitates the import of modules under test
- recreates a directory in the file system for the test outputs
- provides a number of function for supporting tests that access the database, mocking the config files to point to the
Expand Down
2 changes: 1 addition & 1 deletion db/dot/1-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ CREATE TABLE IF NOT EXISTS dot.test_parameters_interface(
parameter VARCHAR(300) NOT NULL,
parameter_type VARCHAR(300) CHECK(parameter_type IN ('entity any field', 'entity id field', 'entity columns boolean logic',
'view/table', 'entity date field', 'one of (hour, day, week)',
'entity numeric field','sql statement','list of values')),
'entity numeric field', 'sql statement', 'list of values')),
example VARCHAR(300) NOT NULL,
description VARCHAR(1000) NOT NULL,
UNIQUE (test_type, parameter),
Expand Down
2 changes: 2 additions & 0 deletions db/dot/2-upload_static_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ INSERT INTO dot.test_parameters_interface VALUES('expect_similar_means_across_re
INSERT INTO dot.test_parameters_interface VALUES('expect_similar_means_across_reporters', 'quantity', 'entity numeric field', 'temperature', 'The name of the numeric field to analyze for variation');
INSERT INTO dot.test_parameters_interface VALUES('expect_similar_means_across_reporters', 'data_table', 'view/table', 'dot_model__iccmview_assessment', 'The name of entity view where data is');
INSERT INTO dot.test_parameters_interface VALUES('expect_similar_means_across_reporters', 'id_column', 'entity id field', 'reported_by', 'The id column to use to get failed test records');
INSERT INTO dot.test_parameters_interface VALUES('associated_columns_not_null', 'condition', 'entity columns boolean logic', $$stops = \"non-stop\"$$, 'Where clause of rows that are going to be checked');
INSERT INTO dot.test_parameters_interface VALUES('associated_columns_not_null', 'associated_columns', 'list of values', $$["price", "origin_iata", "destination_iata"]$$, 'List of column names that should not be null');

-- dot.scenario_test_types
INSERT INTO dot.scenario_test_types VALUES('MISSING-1', 'associated_columns_not_null');
Expand Down
8 changes: 6 additions & 2 deletions db/dot/4-upload_sample_dot_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '3081f033-e8f4-4f3
'', '', $${"name": "t_direct_flights_positive_price", "expression": "price is not null and price > 0",
"condition": "stops = 'non-stop'"}$$, '2022-12-10 19:00:00.000 -0500', '2022-12-10 19:00:00.000 -0500', 'Lorenzo');

COMMIT;

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'd74fc600-31c3-307d-9501-5b7f6b09aff5', 'MISSING-1',
3, 'Direct flights have price, origin & destination', '', '', 'all_flight_data',
'associated_columns_not_null', '', '', $${"name": "t_direct_flights_hava_data", "condition": "stops = 'non-stop'",
"associated_columns": ["price", "origin_iata", "destination_iata"]}$$, '2022-12-11 19:00:00.000 -0500',
'2022-12-11 19:00:00.000 -0500', 'Lorenzo');

COMMIT;
4 changes: 2 additions & 2 deletions dot/dbt/macros/test_associated_columns_not_null.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
-- test the number of rows that are null in associated columns.
-- For instance if fever = yes then one of the associated
-- column is fever_duration.
{% macro test_associated_columns_not_null(model, column_name, col_value, associated_columns, name, table_specific_uuid='uuid') %}
{% macro test_associated_columns_not_null(model, associated_columns, name, condition='1=1', table_specific_uuid='uuid') %}

select
array_agg({{ table_specific_uuid }}) as uuid_list -- postgres only?
from
{{model}}
where
{{column_name}} = {{col_value}}
{{condition}}
and (
{% for col in associated_columns %}
{{col}} is null
Expand Down
11 changes: 11 additions & 0 deletions dot/self_tests/data/expected/integration/configured_tests_json.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
,test_id,test_params_all_json
0,9097669f-b1e0-394b-b628-930d36cf2890,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
1,368b65ac-b5e4-37a9-902d-5f385f94a9a0,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
2,a87d911d-28cb-3453-a777-b62cec58c7ba,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
3,59b7fabd-acb8-3a38-8cbf-91736a214cab,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
4,8abccc35-874f-3e54-98eb-6eed5c00cf72,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
5,2ba7f3e8-cd62-37ac-854f-01f704489130,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
6,cad13f73-27b5-3427-be8f-4d213bba3b19,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
7,ed27037a-4054-3070-9d88-fdf9cd0231c8,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
8,df44c2f4-65f8-3170-a03f-6035aaa45660,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
9,942f4496-1202-3768-9cfe-96128bcd754c,"{'key': 'reported_by', 'field': 'uuid', 'query': 'SELECT field1, field2, \'table1\' as ""primary_table"", \'field1\' as ""primary_table_id_field"" WHERE COLOR=\'green\'', 'values': ['dog', 'cat', 'ostrich'], 'quantity': 'temperature', 'condition': 'stops = ""non-stop""', 'id_column': 'reported_by', 'reference': ""ref('dot_model__ancview_pregnancy')"", 'data_table': 'dot_model__iccmview_assessment', 'expression': 'malaria_act_dosage is not null', 'associated_columns': ['price', 'origin_iata', 'destination_iata'], 'table_specific_uuid': 'uuid', 'table_specific_period': 'day', 'table_specific_patient_uuid': 'patient_id', 'table_specific_reported_date': 'reported'}"
Loading

0 comments on commit 6e5d741

Please sign in to comment.