From 5b60fd4a5daab8f646f4365672a61b8185e5d6ce Mon Sep 17 00:00:00 2001 From: ruaridhg Date: Thu, 14 Dec 2023 15:34:37 +0000 Subject: [PATCH 01/28] Add -e to pip install command in cli test --- cli/tests/run-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/tests/run-tests.sh b/cli/tests/run-tests.sh index 9acee9c43..0fded67f9 100755 --- a/cli/tests/run-tests.sh +++ b/cli/tests/run-tests.sh @@ -24,7 +24,7 @@ THIS_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) PACKAGE_DIR="${THIS_DIR%/*}" cd "$PACKAGE_DIR" || exit -pip install "../pixl_core/[test]" ".[test]" +pip install -e "../pixl_core/[test]" ".[test]" cd tests/ docker compose up -d From 68cc3bb975be724568a1dda7a54e07d6513c228a Mon Sep 17 00:00:00 2001 From: ruaridhg Date: Thu, 14 Dec 2023 15:46:23 +0000 Subject: [PATCH 02/28] Added flag for csv=True and ran tests for cli --- cli/src/pixl_cli/main.py | 24 +++++++++++++++++++++--- cli/tests/test_queue_start_and_stop.py | 8 ++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 5a88c8ff8..476ccd91b 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -73,8 +73,14 @@ def cli(*, debug: bool) -> None: default=True, help="Restart from a saved state. Otherwise will use the given .csv file", ) -def populate(csv_filename: str, queues: str, *, restart: bool) -> None: - """Populate a (set of) queue(s) from a csv file""" +@click.option( + "--csv_file", + show_default=True, + default=True, + help="The input is a csv file rather than a parquet dir", +) +def populate(csv_filename: str, queues: str, *, restart: bool, csv_file: bool) -> None: + """Populate a (set of) queue(s) from a csv file or a parquet directory""" logger.info(f"Populating queue(s) {queues} from {csv_filename}") for queue in queues.split(","): @@ -84,7 +90,9 @@ def populate(csv_filename: str, queues: str, *, restart: bool) -> None: logger.info(f"Extracting messages from state: {state_filepath}") inform_user_that_queue_will_be_populated_from(state_filepath) messages = Messages.from_state_file(state_filepath) - else: + elif csv_file is True: # noqa: SIM114 + messages = messages_from_csv(Path(csv_filename)) + elif csv_file is False: messages = messages_from_csv(Path(csv_filename)) remove_file_if_it_exists(state_filepath) # will be stale @@ -360,6 +368,16 @@ def messages_from_csv(filepath: Path) -> Messages: return messages +# def messages_from_parquet(dirpath: Path) -> Messages: +# """ +# Reads patient information from parquet files within directory structure +# and transforms that into messages. +# :param filepath: Path for parquet directory containing private and public +# files +# """ +# return messages # noqa: ERA001 + + def queue_is_up() -> Any: """Checks if the queue is up""" with PixlProducer(queue_name="") as producer: diff --git a/cli/tests/test_queue_start_and_stop.py b/cli/tests/test_queue_start_and_stop.py index 5e1103c82..c9da600ee 100644 --- a/cli/tests/test_queue_start_and_stop.py +++ b/cli/tests/test_queue_start_and_stop.py @@ -22,7 +22,9 @@ def test_populate_queue(queue_name: str = "test_populate") -> None: """Checks that patient queue can be populated without error.""" runner = CliRunner() - result = runner.invoke(populate, args=["test.csv", "--queues", queue_name]) + result = runner.invoke( + populate, args=["test.csv", "--queues", queue_name, "--csv_file", True] + ) assert result.exit_code == 0 @@ -32,7 +34,9 @@ def test_down_queue(queue_name: str = "test_down") -> None: the queue has been emptied. """ runner = CliRunner() - _ = runner.invoke(populate, args=["test.csv", "--queues", queue_name]) + _ = runner.invoke( + populate, args=["test.csv", "--queues", queue_name, "--csv_file", True] + ) _ = runner.invoke(stop, args=["--queues", queue_name]) state_path = Path(f"{queue_name}.state") From 91b5dfefabedba3d649e03696b39cc17e84debeb Mon Sep 17 00:00:00 2001 From: ruaridhg Date: Fri, 15 Dec 2023 09:03:18 +0000 Subject: [PATCH 03/28] Added test for parquet directory and messages_from_parquet func --- cli/src/pixl_cli/main.py | 74 ++++++++++++++++--- .../test_queue_start_and_stop_parquet.py | 49 ++++++++++++ 2 files changed, 113 insertions(+), 10 deletions(-) create mode 100644 cli/tests/test_queue_start_and_stop_parquet.py diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 476ccd91b..1901f9ce4 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -90,10 +90,10 @@ def populate(csv_filename: str, queues: str, *, restart: bool, csv_file: bool) - logger.info(f"Extracting messages from state: {state_filepath}") inform_user_that_queue_will_be_populated_from(state_filepath) messages = Messages.from_state_file(state_filepath) - elif csv_file is True: # noqa: SIM114 + elif csv_file is True: messages = messages_from_csv(Path(csv_filename)) elif csv_file is False: - messages = messages_from_csv(Path(csv_filename)) + messages = messages_from_parquet(Path(csv_filename)) remove_file_if_it_exists(state_filepath) # will be stale producer.publish(sorted(messages, key=study_date_from_serialised)) @@ -368,14 +368,68 @@ def messages_from_csv(filepath: Path) -> Messages: return messages -# def messages_from_parquet(dirpath: Path) -> Messages: -# """ -# Reads patient information from parquet files within directory structure -# and transforms that into messages. -# :param filepath: Path for parquet directory containing private and public -# files -# """ -# return messages # noqa: ERA001 +def messages_from_parquet(dirpath: Path) -> Messages: + """ + Reads patient information from parquet files within directory structure + and transforms that into messages. + :param filepath: Path for parquet directory containing private and public + files + """ + # TODO: Check if directory exists # noqa: TD003, FIX002 + public_dir = dirpath / "public" + private_dir = dirpath / "private" + + # MRN in people.PrimaryMrn: + people = pd.read_parquet(private_dir / "PERSON_LINKS.parquet") + # accession number in accessions.AccesionNumber + accessions = pd.read_parquet(private_dir / "PROCEDURE_OCCURRENCE_LINKS.parquet") + # study_date is in procedure.procdure_date + procedure = pd.read_parquet(public_dir / "PROCEDURE_OCCURRENCE.parquet") + # joining data together + people_procedures = people.join(procedure, on="person_id", lsuffix="_people") + joined = people_procedures.join( + accessions, on="procedure_occurrence_id", rsuffix="_links" + ) + + expected_col_names = [ + "PrimaryMrn", + "AccessionNumber", + "person_id", + "procedure_date", + ] + logger.debug( + f"Extracting messages from {dirpath}. Expecting columns to include " + f"{expected_col_names}" + ) + + # First line is column names + messages_df = joined + messages = Messages() + + for col in expected_col_names: + if col not in list(messages_df.columns): + msg = ( + f"csv file expected to have at least {expected_col_names} as " + f"column names" + ) + raise ValueError(msg) + + mrn_col_name, acc_num_col_name, _, dt_col_name = expected_col_names + for _, row in messages_df.iterrows(): + messages.append( + serialise( + mrn=row[mrn_col_name], + accession_number=row[acc_num_col_name], + study_datetime=row[dt_col_name], + ) + ) + + if len(messages) == 0: + msg = f"Failed to find any messages in {dirpath}" + raise ValueError(msg) + + logger.debug(f"Created {len(messages)} messages from {dirpath}") + return messages def queue_is_up() -> Any: diff --git a/cli/tests/test_queue_start_and_stop_parquet.py b/cli/tests/test_queue_start_and_stop_parquet.py new file mode 100644 index 000000000..85cd4c913 --- /dev/null +++ b/cli/tests/test_queue_start_and_stop_parquet.py @@ -0,0 +1,49 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Patient queue tests""" + +from pathlib import Path + +from click.testing import CliRunner +from pixl_cli.main import populate, queue_is_up, stop + + +def test_populate_queue_parquet(queue_name: str = "test_populate") -> None: + """Checks that patient queue can be populated without error.""" + runner = CliRunner() + result = runner.invoke( + populate, args=["~/resources", "--queues", queue_name, "--csv_file", False] + ) + assert result.exit_code == 0 + + +def test_down_queue_parquet(queue_name: str = "test_down") -> None: + """ + Checks that after the queue has been sent a stop signal, + the queue has been emptied. + """ + runner = CliRunner() + _ = runner.invoke( + populate, args=["~/resources", "--queues", queue_name, "--csv_file", False] + ) + _ = runner.invoke(stop, args=["--queues", queue_name]) + + state_path = Path(f"{queue_name}.state") + assert state_path.exists() + Path.unlink(state_path) + + +def test_queue_is_up() -> None: + """Checks whether status of queue can be asserted correctly.""" + assert queue_is_up() From 20988f7e0aa874a13d5c47fcb67f91ddb3f2c06e Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Fri, 15 Dec 2023 18:42:14 +0000 Subject: [PATCH 04/28] Use resources fixture to find test parquest files. Make input options clearer. Tests will still fail due to missing private parquet files. --- cli/src/pixl_cli/main.py | 53 +++++++++++-------- cli/tests/test_queue_start_and_stop.py | 6 +-- .../test_queue_start_and_stop_parquet.py | 10 ++-- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 1901f9ce4..0be7cb635 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -59,30 +59,36 @@ def cli(*, debug: bool) -> None: @cli.command() -@click.argument("csv_filename", type=click.Path(exists=True)) @click.option( "--queues", default="ehr,pacs", show_default=True, help="Comma seperated list of queues to populate with messages generated from the " - ".csv file", + "input file(s)", ) @click.option( "--restart/--no-restart", show_default=True, default=True, - help="Restart from a saved state. Otherwise will use the given .csv file", + help="Restart from a saved state. Otherwise will use the given input file(s)", ) @click.option( - "--csv_file", - show_default=True, - default=True, - help="The input is a csv file rather than a parquet dir", + "--csv-file", + type=click.Path(path_type=Path, exists=True, dir_okay=False), + help="Give a csv file as input", ) -def populate(csv_filename: str, queues: str, *, restart: bool, csv_file: bool) -> None: +@click.option( + "--parquet-dir", + type=click.Path(path_type=Path, exists=True, file_okay=False), + help="Give a directory containing parquet input files", +) +def populate(queues: str, *, restart: bool, csv_file: Path, parquet_dir: Path) -> None: """Populate a (set of) queue(s) from a csv file or a parquet directory""" - logger.info(f"Populating queue(s) {queues} from {csv_filename}") - + if (csv_file is None) == (parquet_dir is None): + err_str = "must specify --parquet-dir or --csv-file, but not both" + raise ValueError(err_str) + inp_source = parquet_dir if parquet_dir is not None else csv_file + logger.info(f"Populating queue(s) {queues} from {inp_source}") for queue in queues.split(","): with PixlProducer(queue_name=queue, **config["rabbitmq"]) as producer: state_filepath = state_filepath_for_queue(queue) @@ -90,10 +96,10 @@ def populate(csv_filename: str, queues: str, *, restart: bool, csv_file: bool) - logger.info(f"Extracting messages from state: {state_filepath}") inform_user_that_queue_will_be_populated_from(state_filepath) messages = Messages.from_state_file(state_filepath) - elif csv_file is True: - messages = messages_from_csv(Path(csv_filename)) - elif csv_file is False: - messages = messages_from_parquet(Path(csv_filename)) + elif csv_file is not None: + messages = messages_from_csv(csv_file) + elif parquet_dir is not None: + messages = messages_from_parquet(parquet_dir) remove_file_if_it_exists(state_filepath) # will be stale producer.publish(sorted(messages, key=study_date_from_serialised)) @@ -368,16 +374,19 @@ def messages_from_csv(filepath: Path) -> Messages: return messages -def messages_from_parquet(dirpath: Path) -> Messages: +def messages_from_parquet(dir_path: Path) -> Messages: """ Reads patient information from parquet files within directory structure and transforms that into messages. - :param filepath: Path for parquet directory containing private and public + :param dir_path: Path for parquet directory containing private and public files """ - # TODO: Check if directory exists # noqa: TD003, FIX002 - public_dir = dirpath / "public" - private_dir = dirpath / "private" + public_dir = dir_path / "public" + private_dir = dir_path / "private" + for d in [public_dir, private_dir]: + if not d.is_dir(): + err_str = f"{d} must exist and be a directory" + raise ValueError(err_str) # MRN in people.PrimaryMrn: people = pd.read_parquet(private_dir / "PERSON_LINKS.parquet") @@ -398,7 +407,7 @@ def messages_from_parquet(dirpath: Path) -> Messages: "procedure_date", ] logger.debug( - f"Extracting messages from {dirpath}. Expecting columns to include " + f"Extracting messages from {dir_path}. Expecting columns to include " f"{expected_col_names}" ) @@ -425,10 +434,10 @@ def messages_from_parquet(dirpath: Path) -> Messages: ) if len(messages) == 0: - msg = f"Failed to find any messages in {dirpath}" + msg = f"Failed to find any messages in {dir_path}" raise ValueError(msg) - logger.debug(f"Created {len(messages)} messages from {dirpath}") + logger.debug(f"Created {len(messages)} messages from {dir_path}") return messages diff --git a/cli/tests/test_queue_start_and_stop.py b/cli/tests/test_queue_start_and_stop.py index c9da600ee..21736f5e6 100644 --- a/cli/tests/test_queue_start_and_stop.py +++ b/cli/tests/test_queue_start_and_stop.py @@ -23,7 +23,7 @@ def test_populate_queue(queue_name: str = "test_populate") -> None: """Checks that patient queue can be populated without error.""" runner = CliRunner() result = runner.invoke( - populate, args=["test.csv", "--queues", queue_name, "--csv_file", True] + populate, args=["--queues", queue_name, "--csv-file", "test.csv"] ) assert result.exit_code == 0 @@ -34,9 +34,7 @@ def test_down_queue(queue_name: str = "test_down") -> None: the queue has been emptied. """ runner = CliRunner() - _ = runner.invoke( - populate, args=["test.csv", "--queues", queue_name, "--csv_file", True] - ) + _ = runner.invoke(populate, args=["--queues", queue_name, "--csv-file", "test.csv"]) _ = runner.invoke(stop, args=["--queues", queue_name]) state_path = Path(f"{queue_name}.state") diff --git a/cli/tests/test_queue_start_and_stop_parquet.py b/cli/tests/test_queue_start_and_stop_parquet.py index 85cd4c913..42e377674 100644 --- a/cli/tests/test_queue_start_and_stop_parquet.py +++ b/cli/tests/test_queue_start_and_stop_parquet.py @@ -19,23 +19,25 @@ from pixl_cli.main import populate, queue_is_up, stop -def test_populate_queue_parquet(queue_name: str = "test_populate") -> None: +def test_populate_queue_parquet(resources, queue_name: str = "test_populate") -> None: """Checks that patient queue can be populated without error.""" + omop_parquet_dir = resources / "omop" runner = CliRunner() result = runner.invoke( - populate, args=["~/resources", "--queues", queue_name, "--csv_file", False] + populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir] ) assert result.exit_code == 0 -def test_down_queue_parquet(queue_name: str = "test_down") -> None: +def test_down_queue_parquet(resources, queue_name: str = "test_down") -> None: """ Checks that after the queue has been sent a stop signal, the queue has been emptied. """ + omop_parquet_dir = resources / "omop" runner = CliRunner() _ = runner.invoke( - populate, args=["~/resources", "--queues", queue_name, "--csv_file", False] + populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir] ) _ = runner.invoke(stop, args=["--queues", queue_name]) From e6bfb7d3c93d3739b34bc3992354778b358794df Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 13:44:02 +0000 Subject: [PATCH 05/28] Add synthetic "private" OMOP data taken from the issue --- .../resources/omop/log/extract_summary.json | 94 ++++++++++++++++++ .../omop/private/CARE_SITE_BAD.parquet | Bin 0 -> 3305 bytes .../omop/private/CARE_SITE_LINKS.parquet | Bin 0 -> 1201 bytes .../omop/private/CDM_SOURCE_BAD.parquet | Bin 0 -> 7852 bytes .../private/CONDITION_OCCURRENCE_BAD.parquet | Bin 0 -> 5004 bytes .../CONDITION_OCCURRENCE_LINKS.parquet | Bin 0 -> 612 bytes .../omop/private/DEVICE_EXPOSURE_BAD.parquet | Bin 0 -> 4524 bytes .../private/DEVICE_EXPOSURE_LINKS.parquet | Bin 0 -> 682 bytes .../omop/private/DRUG_EXPOSURE_BAD.parquet | Bin 0 -> 3907 bytes .../omop/private/DRUG_EXPOSURE_LINKS.parquet | Bin 0 -> 597 bytes .../private/FACT_RELATIONSHIP_BAD.parquet | Bin 0 -> 1357 bytes .../omop/private/LOCATION_BAD.parquet | Bin 0 -> 1343 bytes .../omop/private/LOCATION_LINKS.parquet | Bin 0 -> 904 bytes .../omop/private/MEASUREMENT_BAD.parquet | Bin 0 -> 3982 bytes .../omop/private/MEASUREMENT_LINKS.parquet | Bin 0 -> 2309 bytes .../omop/private/OBSERVATION_BAD.parquet | Bin 0 -> 3618 bytes .../omop/private/OBSERVATION_LINKS.parquet | Bin 0 -> 1263 bytes .../private/OBSERVATION_PERIOD_BAD.parquet | Bin 0 -> 1488 bytes .../private/OBSERVATION_PERIOD_LINKS.parquet | Bin 0 -> 606 bytes .../resources/omop/private/PERSON_BAD.parquet | Bin 0 -> 3614 bytes .../omop/private/PERSON_LINKS.parquet | Bin 0 -> 1953 bytes .../private/PROCEDURE_OCCURRENCE_BAD.parquet | Bin 0 -> 3665 bytes .../PROCEDURE_OCCURRENCE_LINKS.parquet | Bin 0 -> 1311 bytes .../omop/private/SPECIMEN_BAD.parquet | Bin 0 -> 3326 bytes .../omop/private/SPECIMEN_LINKS.parquet | Bin 0 -> 928 bytes .../omop/private/VISIT_DETAIL_BAD.parquet | Bin 0 -> 3228 bytes .../omop/private/VISIT_DETAIL_LINKS.parquet | Bin 0 -> 435 bytes .../omop/private/VISIT_OCCURRENCE_BAD.parquet | Bin 0 -> 3429 bytes .../private/VISIT_OCCURRENCE_LINKS.parquet | Bin 0 -> 1349 bytes 29 files changed, 94 insertions(+) create mode 100644 cli/tests/resources/omop/log/extract_summary.json create mode 100644 cli/tests/resources/omop/private/CARE_SITE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/CARE_SITE_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/CONDITION_OCCURRENCE_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/DEVICE_EXPOSURE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/DEVICE_EXPOSURE_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/DRUG_EXPOSURE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/FACT_RELATIONSHIP_BAD.parquet create mode 100644 cli/tests/resources/omop/private/LOCATION_BAD.parquet create mode 100644 cli/tests/resources/omop/private/LOCATION_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet create mode 100644 cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/OBSERVATION_BAD.parquet create mode 100644 cli/tests/resources/omop/private/OBSERVATION_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/OBSERVATION_PERIOD_BAD.parquet create mode 100644 cli/tests/resources/omop/private/OBSERVATION_PERIOD_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/PERSON_BAD.parquet create mode 100644 cli/tests/resources/omop/private/PERSON_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/SPECIMEN_BAD.parquet create mode 100644 cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/VISIT_DETAIL_BAD.parquet create mode 100644 cli/tests/resources/omop/private/VISIT_DETAIL_LINKS.parquet create mode 100644 cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet create mode 100644 cli/tests/resources/omop/private/VISIT_OCCURRENCE_LINKS.parquet diff --git a/cli/tests/resources/omop/log/extract_summary.json b/cli/tests/resources/omop/log/extract_summary.json new file mode 100644 index 000000000..ab66372fc --- /dev/null +++ b/cli/tests/resources/omop/log/extract_summary.json @@ -0,0 +1,94 @@ +{ + "gitsha":"56e0eba8d098523c99f3c899979096d2c5ed4c5f", + "filesummaries":[ +"CARE_SITE.parquet: 4084 bytes", +"CARE_SITE_BAD.parquet: 3305 bytes", +"CARE_SITE_LINKS.parquet: 1201 bytes", +"CDM_SOURCE.parquet: 5823 bytes", +"CDM_SOURCE_BAD.parquet: 7852 bytes", +"CONDITION_OCCURRENCE.parquet: 6770 bytes", +"CONDITION_OCCURRENCE_BAD.parquet: 5004 bytes", +"CONDITION_OCCURRENCE_LINKS.parquet: 612 bytes", +"DEVICE_EXPOSURE.parquet: 4524 bytes", +"DEVICE_EXPOSURE_BAD.parquet: 4524 bytes", +"DEVICE_EXPOSURE_LINKS.parquet: 682 bytes", +"DRUG_EXPOSURE.parquet: 5782 bytes", +"DRUG_EXPOSURE_BAD.parquet: 3907 bytes", +"DRUG_EXPOSURE_LINKS.parquet: 597 bytes", +"FACT_RELATIONSHIP.parquet: 2167 bytes", +"FACT_RELATIONSHIP_BAD.parquet: 1357 bytes", +"LOCATION.parquet: 1865 bytes", +"LOCATION_BAD.parquet: 1343 bytes", +"LOCATION_LINKS.parquet: 904 bytes", +"MEASUREMENT.parquet: 6742 bytes", +"MEASUREMENT_BAD.parquet: 3982 bytes", +"MEASUREMENT_LINKS.parquet: 2309 bytes", +"OBSERVATION.parquet: 5614 bytes", +"OBSERVATION_BAD.parquet: 3618 bytes", +"OBSERVATION_LINKS.parquet: 1263 bytes", +"OBSERVATION_PERIOD.parquet: 2183 bytes", +"OBSERVATION_PERIOD_BAD.parquet: 1488 bytes", +"OBSERVATION_PERIOD_LINKS.parquet: 606 bytes", +"PERSON.parquet: 5420 bytes", +"PERSON_BAD.parquet: 3614 bytes", +"PERSON_LINKS.parquet: 1953 bytes", +"PROCEDURE_OCCURRENCE.parquet: 5230 bytes", +"PROCEDURE_OCCURRENCE_BAD.parquet: 3665 bytes", +"PROCEDURE_OCCURRENCE_LINKS.parquet: 1311 bytes", +"SPECIMEN.parquet: 4873 bytes", +"SPECIMEN_BAD.parquet: 3326 bytes", +"SPECIMEN_LINKS.parquet: 928 bytes", +"VISIT_DETAIL.parquet: 3228 bytes", +"VISIT_DETAIL_BAD.parquet: 3228 bytes", +"VISIT_DETAIL_LINKS.parquet: 435 bytes", +"VISIT_OCCURRENCE.parquet: 5259 bytes", +"VISIT_OCCURRENCE_BAD.parquet: 3429 bytes", +"VISIT_OCCURRENCE_LINKS.parquet: 1349 bytes" + ], + "datetime":"2023-12-07 14:08:58", + "user":"John Watts", + "settings":{ + "site":"UCLH", + "cdm_source_name":"Test Extract - UCLH OMOP CDM", + "cdm_source_abbreviation":"Test UCLH OMOP", + "project_logic":"mock_project_settings/project_logic.R", + "min_date": 20100101, + "max_date": 20241231, + "enabled_sources":"epic", + "output_format":"parquet", + "OMOP_version": 60, + "cohort":{ + "file":"settings/mock_project_settings/mock_cohort.csv", + "exclude_NDOO": true, + "exclude_confidential": true, + "min_age_at_encounter_start": 16, + "max_age_at_encounter_start": 80 + }, + "keep_source_vals": false, + "person":{ + "include_nhs_number": false, + "include_mrn": false, + "keep_day_of_birth": false, + "keep_month_of_birth": true, + "include_gp_as_primary_care_site": false + }, + "observation_period_strategy":"visit_span", + "local_timezone":"Europe/London", + "output_timezone":"GMT", + "condition_occurrence":{ + "include_sexual_health": false, + "allow_icd_as_std": true + }, + "measurements":{ + "include_file":null, + "include_measurement_concept_ids":null, + "non_generic_numeric_labs": 3040104 + }, + "location":{ + "keep_only_zip": true, + "replace_postcode_with_LSOA": true + }, + "mapping_effective_date": 19698, + "name":"mock_project_settings" + } +} \ No newline at end of file diff --git a/cli/tests/resources/omop/private/CARE_SITE_BAD.parquet b/cli/tests/resources/omop/private/CARE_SITE_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1028dcef5fcb22e5e4c7f8f1c01c07e240b9d4dc GIT binary patch literal 3305 zcmd5<-EQJW6n5BTRe4b_s)9getK0;sQnRs5OhWGl2M9~-4T%4n$TkFUZ0zMH7}U!? z$ZAzpeSlu{va0$FeV(2(#$bqoHchpv1>^bo&Ud~!XXXg5NrjK>N0>o>uNNypuS{E4Cj)!9Dd3kE zJIw#wUkQk=1&rb z?I^UFByVb0;F(AmjJ9HV#9SpXm&g|kk_Ucd-@Xy@?cUH58D-*$xcS8Mm3-!bNpP#cLTdU@e z;ho}HfwmmWb3&ShVOrqD6d>?cjafd;a;zYpQc#-q{E=tKdk|_Z@^tio1TF9z1PQ(; zo@3R?WHivG6Y#fmf5z&zIkko`v9w)F{Tv=YueV@aAjVz>c!6D4m_f&%7S7lQNu~0x@q<^&VjsdM_=qG&2r0uC zpAsRbB-lAx3QF~^Bvy^S)wO3NAvMU+I<(mM8%$1&vbAoK-c(CUs zOYCcn`a&{tf@A>3sByrtkjplVi@ZX>N431g1-`4Tl1yw@z9<5&<^tY*oO{i))WFNt zB}b3$!3C|TuheQ*INdS_oo2<31$GbG7l7?-Ve9 ztDL4JOS#uB>l1D0^KJWv+^u})=78_(R{d5NOs-weIBm?iQS@?R=q0`2_FF>zRukHQ zsn0u>X#fuLAsX#wmTL;;UBJJq5XWdW-zG6LPN=GJvT#z;UZK$^kva zZ9NMuRiC5(j+GyG8nqx+l!n=HtD%@pA!C6*Qy;b+#OgwZ*DaV&wfhhYb9rX|yO?@+ zyny$yig)uGFR^rv<32Vy3cwmcQrBlECwRTfHG0!!oLrR-LN0Ok^3(Q#mhEe(Kkfn= zbeGFhCtlH^UBr-V3!9BRY{vxj>R8I^$Vc@RslH1}K)bXj;ab7HZwYx$ZDgjHZ<4!M z`X^ahq>9FJ?CYWSYdugOxAl1E0%mD(L%ehjBqs>;f=I^D$94bFu=6SBkzDzDmimAM ziZe)Uk?f4n6o5E?lfFM2`2+qa4)#5V^S)4_Ceghk2&b!E@){Ors(lb@PsZ15OQ@>G>G4jF)0N&^56 zJj3->w5551CoCgI)w7%MYA|F3y?^gM#^ zP5-3XIXn5Ugyt#|0cQk}#tmtf2!GAz^Xr6UHDV-*Fw&L?f2>4=4{Bq_l?Q{S^e#rO z+>`GXFfW0aGz(QDL$oU?igxX0QT>AL~>8``=0du5TT{_ z?5}bAbKJs9Hx)9g1uzo6dn`{Kp1`V)*xv&WOxHUM>i(P$iWwt|5g-`NvdGf@;m)bXXYCj=UXVn>LfC=HdZhygF?R1#vB zcGSZiHFA}dzeY}#s#K@cs1(JqGP)$G(AB~^7};r@xTo#+Zf-i3NHC}=9`uw|<+q_; z-&K4SbvZ=EC_3&G{lm`wxRvjP%sc>gM2S%{a^7xwp@`ovX_pvoom zV=Jn(I+7a16<-~8E936w{-pn`J#ab!1I`2c#(cA29xA-4q!M6cCyw{#Tu#^A$z;v( hyt7&MLVEt_>~(fyb3M17Gq){(;DMj`qp9Ja)gSU93#$MC literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet b/cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2478ac1ff00fe3b5c80757d96ec827b51e485dbe GIT binary patch literal 7852 zcmdT}O>83f6(2^#>m}^Q>)m0oiYgAfqm>db%&=gSQwD4>;K4TK`(iP}7>xNU3Zu`zvn#=4-UQ29g(4! zb;k7p>|bM6m=#Zmdf~Zhzx&En5O&{7!=1L-Xo~%U_At(kAhoi#;eJo9wvPLqK~GkW z+m)8W&@S5b8SVOV`1XpQHLiqPnre zJu{wB@7s4w^`2VPt4LB$IX|tar=2!Y^Nr2*XB+#io&BvH6`kRFNy=Ph)|e-{GOHdt zAVHLKRIuZ`o7`vMzF9TlIuLHX)2u2zg76cY>(@5+ANSbT_Yk3#$y_w6j8jKrcYePQ z=AMZow;g9EcNZ`;M>DAY>F_YL79)?f_~jxLZH( z$Q5bOtn|h@Dgq_zDzo1`V!wXm3DMNs3jFkmMW7Z^Ri!WYPP;}*qQI{ov#%fDhQRL_ zQ{c~afgO3!Qrar10uU)DN>2gn8`{2R*xxVipm_)p^&FGsIu2>tpsy5Bsm5 zp&B~Vm3(Cls0Sl3q%Tr|7t`uBq;6dV-Xwx0VB z?+T89gAehcVA#Qhx3;&p9YIGf8{33ufOGKV?sss0KIDL5i06U<*nwXmeFqnE_#J%k z71_jb777m6jQw1Y6k@ITA?<`=5NNFXbiT+Ee?+RbiPwd5en&-Bd#BPs1qd3VEr;A` z4q9!P(BMpvPxI#kRvgw(jVVzE?q%ZvRj^%7z9p6mK{x4D=Ha#-myejshh@guw6_9& zhu$vdp9hx?tZQ56FQ{5=HQ^;fv3Lj3+ z%I+_o`taHk5`rKc!4I#LAiNSHH6bPlH6dCfbCJl@`Vc2~jrD^g*o$Mh2p6&rJck0% z!ILmS8Nv?U$kxah-}<#240luFurp3K5_~#)mJ+jPfrFjet7s#a%I7oj{cJ6j&WfXG zBcVM7*dvV7wGPhlVq=#BdM?@u)}=!JBHGyHqYc>8$oYZCg?Q8%!H*L!Ku%42!rTn~ zD1Fpzl;hlq%!fI-m2MV!iVjz89 zEhZb~e7Ig}CCBArxmzk`x)T5Dr;R@=Oz`vdb19x_%HUIRCjmCgq0eT0Wbtu=OADIK zC$V7qr)K&G;v@xj@XM{{X|)LaCvi6M-LTt`_z@=+g5VbmKN2@&d~M7e2wh&{({=D= zzf?>Hi-B@eZl~|u?v~ge>p3uqMTj9I7R6z-ZN$wP=mECK532E)zfuSfih*Ra+{%y3 zg z5c1#Xkt0#))~kD&aRv0N0&dG%0Qj9tdwErAV^4zK32i%ytvp_f!d=ETzJ0_@?;P?RhJJ~5n@Fwh#LjXAsaEAe(4|yEr$mAZb8%GVD|Fj;0xD)pIwgun0&j*_$ zkqjX^!#fDpFEYEpPOxXk?>q7N(uZ49@TppdscguZvv8Yl>aUm*KF;)8=rj3^m3 z^(9Orc+`n3iVv`71L)uHT%+GFkk_JE1k1@tA29Sw5D>Ha#ZbTaw0(;__!-VO5U(ed z0+QA6Tn3V#M_^(FzC&?mtC2vIk34e&wQL|LUb5jO#9nN)djQ}H| z7a}qqHDM|eoj9?N>lS}zalAO^akYf_;`r3Bwt;U!e3|-ppx2+3i=bwrflv}U9xc8) z$kT$Sh~sO{R>BbFj|4m{1LKkq84AP&U>Ds;i{kfi4u4WlkrZ$c(^774x6=FNKv6g4 mZg;cN>ve`+?X%6x1z!4J`ZwOdKWtz9M?HH=QHSvV)c*n10o-u_ literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..29b36aa4189097666daf75cb726ad5f9d42a68a9 GIT binary patch literal 5004 zcmbtYJ98RW6kg*;?6rzmF$zqWj2rDtCbrpKge=ls7K9KFNrZQ!fdwJZ@(6EKyG)TH zRjN$JbqbG*)GkuF@T5+e$z<{?a?afccUKE`q#dz)ALo4cJI_TNlbqzeoxqE)!=W4&#Xph_d-f3S|C&OX0r#4U94X;|c$(#R=E#X~K1z zm~fM>CETb@2{-IZ!u1-FaQ)6BT(i{(*Y7RD4V%fJmxt6Q{Szj@6BDp zhj@8c-VH~1De_`(Z;uc2h5Y_Ac!nfirgxE-M0uBo>8>n=Lm<(sb(ExHzs+V*F-7`OJ1T8ci0J4K=UNkV1>^x#QB;7Ldf zZ>UP53mlDcJ04GEX8lFBla#ahi&Q3m5ju*sc9c#bRV?Nb>3l1d&1dFHCuuwh$dE<0 z)yFkC(}_!3~e9AOgl^=A=K*v;aSUVyq>lEcv*({aoOgucR^r z@M1|gqcBJ8)jXJUbiPt1Z(tywPI3dMONc zt-;GD!M4gGlP{p$rP9q#>)qU-kDM;>CMoIOK3DspiKoa{D2 zZrN=tPO2`vGc7ysYAJkCmUHKgL=k)=u{O?+P6l$VoE^Yf>-@fOVybdPQZYWE>UsU3 z$YL(^JYf80%3kpW=6rkxv4L8NLw#T^u(=V3SbIG{Q>lm&KKIT}jNrLshhh-hcT0WdXnMVs)`w5(9P+rNPQXX%5QxJn^!5~bc!5fSNS>f3p%G5 zoKpXR???mJ1+L((ub&ZDCQ z?j>wBxuV9R7Y1KKzC~5V?94u8il;iZZBXZqWjZz#X!JlKZ!E8th~XzhW;^%1qVyM z^i+m#88>b5lp~!=sJKwOMeN1+4l(@QBGH-UZzEq&n-I?TWKbJ^H))Qase{3@+HlyP iJ(ja}M)n-tNx4$!rIM2G@Ck7cV&DPzKP88US$MWo}l8D{+Gxwxvrp_a^N8 z$cWx>7|pM+E+C)bt+0evlI0jha`pasTXP(DP=4+x>K)`mJSus+)_FkCX^i1P{S- o(lC2k#mj1xrqwXZlC|?3XA9c1)7Y!kYjt7Uw5kvKG9CI~UyLPtZxl>JR8LtE$SPi>iyNscXme&YcGXwx=>4+vEE>-#zy`-#Hh% zNXjdbixDm|SAZ9Ho8vg1k45H|mbrzc#jv7UZCP!|{hq82j7D3D%tg6qEP9XU_{$&0 zY)i12hWPe^hw;9Hq3#d*LsM59LqpPyh9ViN5A2Hz0cbzW!A`bE_fAJM+Fd{5H**1q zf6ROSElwkLIBctmtv8S z&D$LZY1lV)2&g4#rk|`YF9pDSf7x@$CCh<1Q5Rna3FFNb4=Y@hjJDb{HcVY=YU)$< zz^8{-R{|V*d({I8WqWep>=~*)Y5)3ako`4}m+(L{TkT$Bp!Y4cCxOg;PQ1Pr0PA}qal%rc7%9Vkt5Bt5z8u8{S0M_MFu#N+ zV3v+Drl^Df%*RwmgT#J&%?2gU^Nsw+|JDe zudKOc!w(u*4~{{elI>h11pA$oz{^}AHto-`%yif}mTdO;ME=-29Zv~&AN0eob^rJ= z5xbK}mZwg8(-Q<|PmDEW_fL`q7v+{>OGn{Zt||U}_Y!~pi-2FWudc`O8MShRkP7^GPo_maA*4=di98{sZ;zKsgtFGogLr&Bi_L5a zURy0}ujAYbnr)E{XpZ2EJVx^Z$q+*Oq(#V6>^~d9(oBVbZGnJk5kWHR@FFkp#Xxa_ z5UXf`f6ypB!9@U#4{Nek7Uwp_ft=J5LNeW{r>X~Xw`$gt)fZB&M0D4lWMOwg9=Lr6 zZn@iN%lq?+8@wwhXcAmaS9jmL0aY*FwYTs>U3``HY0k{Q|l;FI%3s1BcXB}sMZ0~=DWi5z@VnjM}~;JR~O2=LTcMm zgsqND$izZuma1b0_#Y*ajdOE{ky;eNnhoxu5Mi$Ewwc<9R5>1+;#>F5DB9ZwO$ zT$A(rD`HYJg%mw#rbMmT%|Yy$>kumD2SB>NA(X|GeLj+9q0HfVS3Bv+c;Zfe{xKdj z*X+VsYikKn*eZwuqn9XSJhsWm?VqMsj9rfILL9Ybubc($cI(}J?Rm1ghMNvudq=Nv zx9{K;aFI{{dFgn6DnE{*!sfM)}Ig@^88se=Yclnrd- zXj!O&Zm3pB>|4!l*=nNebO%q%2LsU?G^AYO+`K6e0mV!R?2rY?|~XDMtC*!k;G8n<=R9^j=N9h|Bpv>aGakx=!9z*c`=k37{7uyE d_wKKx9wi^%#niF^Vijz`qQAnUzrx?> zETzWRiOFX6o1L9+zS;H?t%4kvL6#ciAPE45q05q%hJ>aj`}}$2@LN8gjQyF%Z%2Ju zQb5s_Edz|TZ9OcdqTd`Ju`iO1nmZ2sA?MSU?ap|ezkK6(i~S(cj`l!J_|A|&*m9gy zrKSHwgo(ezpn#@pnUy9mR+6mjEA$Y`(I!x5bsa71qJ0?ZSKp-_VfzClyAqD9ju;>t zU=4AAl|>2}Lp&473gU^_DUNoE1Z2(SIo-Swq!d;wRdHBZP%oeekqOAmJM*CA6o+oF zW3{{Od7;;wx?EgsMD@s`CM1Sn7g-SMAcjaU!=WHWUpUeg*|h*YvsJX)=WeIi+I7r> z*M8@euzOD_;tRw)$70TfE zD~3(2W9W1Izgc&{_~5oMp7tBPhFi!*je{Fh6V^T3-a!v z1JZAg{2n(e$P<>73DuXHhMjvqJ#s+#W5Yu6DxIEMHrkNd9Hd`1#*nr@ZSHfvm7ZKz zTaxD)D8{a9S|Hf;ZPT;yWFNEHxvjbDK5kM~0S}Hnx4U(G{n-Bv>;(~hC`tN||1Iazaym)rJl zf7)0;koNM6iY@;+>dG%7r~HEY$uBOL{9+2p zFD{ARnN4j<|f#uQmV z5Hko|h}8lofdIS+LJn6*2XA0Cx(fIrrlLbJ9}QJn&0sN-=_|#fGZ=?Ug{zPVFhjry zM9&Ta0fbK^F;O!q`ies$U@F3(63GX_b_mZwDV(n=tz=IroWfcb{nGu3lSm){d6MiQ zjUI|c*XaXpNvu~YH&6;nRSri(ax2p;hBN&NJmqw*DizS|b(F=!JdD% z4+|uOrDCS5B%}ROI1`fFM@_^mhMVUkF|s%D4u}7N2XMFfo>jv8XJSzZQY>6bkL*pn zS7#%+$roB2^v+44gJ;aqO#+r>z3 zUryz#rEt`MGq{9&yCS+E@m`=x@d){WPVAZZLX45(9A|C(psZ0^aF(rPP%4C~VqyQh z7|zv|6rO3kVcJ8!@vlvOykzyC$WK7(nem15dopkicm}o48)7ngUMl9AkhhL}Q;=_H zUNh^1?+GQ7C$WQxcxUrDeH0Qy(WaUf`ZDkk_z35mtBR>)5b-6bDa9x8+x+X`gOEME z+47fhEKcJMl~i&7bsdWvwn7uozK170A3DYGDF4#0^I38_$}d}^r%YSd>{|4fqBq8W zu~@&XM1ZTUd{vI*>T>%w56~CzQ$n3Fuh=PMM*AH^124K_%jX0q&3wi=dl&~xDj&3y z$*Z9`Hv4IcuSgZ)QY;<~f%|4p#)78*5x=16v#58Fl1xC*XmKsln*D!_^S1oyy4nBs z{8`_F^iZ{{fp?~#)SU78JI=S$m9z^()t}4V?sG}gI+r`}FQ?b&w0A<07r_@n_)!Eu Qq5l7ed%DhXC-C?AUq79ftpET3 literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet b/cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..183ce17cc8c031a9a8047c7cdc8c92e847381ac7 GIT binary patch literal 597 zcmcIiO;5r=5Z#vAkm{i^Y|}M8aIxV~5D*S}n^y1(Ban#P2q{e^Ak-pa`~_b9FCIO3 z@P|3GXkudG&6mv1&YPK?x7lq&x571E;9iRxoNY6fYblH|6)l6Np>3dX#x1_%VU5Xk zt~<6ytgTV4#R@+nBsL4wZYY_|bCbCrio;b=XSykDbCfRQLG+l+7t1snj7MCx4C_^~ z-W0d(X8$@?c&L+cm86*HP?ILSITW4p#^acumV4ZCO`~V-oiB&3EOyzT8h0JeZA#=ZL z+1FC|F03H`N?s?&pZqPkg8VbJAU_G5fi>xSqp{ofW5tM>_>#ILB@cECF%SHSZX&^tV; zpu&{zVAhv${S!Q756%kt0>-&0+ajd8jnjFv@zL#+2DA34=2^#j*VJ-TQ+pqFiaEat z`C+Q;{##CpuuffeiXzMiLzKS4wK*)FGJbLI9I;UcR*1U-b9OezhZW|(jG;v-AXPjq zHC=PZ=vobb*;wkWnY-r!JNM=8WbPuXHq@n zbq6sfib9d!F-rg^K8|o{mU0ubF_<1}wOM;u8`$MV-k=v^|G*+|DqQDH;{1J*&iGdF qVd_jaZQtLtg1`$iGbb3suRoLB+1lRPmQNHy;ve!SNk|?3NdE#I(=s{$ literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/LOCATION_BAD.parquet b/cli/tests/resources/omop/private/LOCATION_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d53d5b28076a5135c7cc5d7ca3be0072238dd330 GIT binary patch literal 1343 zcmb7E&2HL25MB&IjmoL2EnBiGE?BCfq}a4lS}BJ$KOuhk+EHeU;?!vR!~np z_7QsOu@BJa=%u$l0}s$c51m<4XW@Wgg~y&x-#6dxjB%5;bul9nQ8CGkJNJlZIN@d-F|=hE}o7&Prt%tcqXN^1sASUN+W7| zv_`2%%O1l_Ae`ecV zcj!m-6zlJfR`_@*p3nTchF^49rz#!f7jb_P5L+FFvZf@hgnl7*P#VHzhCQQk;~e?-J^$_aip?X!`9Pg`Xh@3p ygBp5BwTwN)ytAPv<|WYzgZC literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/LOCATION_LINKS.parquet b/cli/tests/resources/omop/private/LOCATION_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f73025f888f445ecc68e19aea4855508290644fd GIT binary patch literal 904 zcmcIj%TB^j5S?Bcf{F<--lk2uU}NI~6);3JZl;Jt@dYNnHl`^J5>c?As4L@FxOC~# zPw*T36u-ck>jQkmos-^qoVlGjbKARx)r{0-LXNMIfiZxXtpR|bB|(l!Bc;+&8MRHF zQB&u<0h0p}0Ygbn>Y5f2&Vy!4VdWs2u*{KW%lEpC*6CrRCWWCJcbajpIe91cS3Qfn zf)valL59eDBN-=bl<^-V88vmzGs%8Lz#z$&NT1{|)@otU>6C-30gks3=UEWTeu-}d ze`9Ga=D>`0{G;H+lRAwsA-^DjdhkMtd#nDHdW0W8~$W3{@u zyYM!%c_U_6=ITgNVMS6#?4eu7WyCrbR0>@eah;O|#1*y_e^`pxbyc2l+IdYy^W+o@ zMULGbYvrKNVc6;=FM_Z` Yw;-9BolZ}ut&|Nw4`k@y-l9AB3a`(XrvLx| literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet b/cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9f18ea14831a13da85edb8df2ea98e7ebad3504d GIT binary patch literal 3982 zcmbtXzjNAV7*??3#5RfT#Blh^^}JYjy*9y!_-eX+V1Bva0*wV|@qr`*wgf~VuswFl zkfGNhLx*0bj+r`i=+HkPe?h0r88UUqkn8n*zfZ^l%gUL^w$Q8Zd!Fz8`5Z2iax^d> z-~zrp+_(oE$MJkP;M>^Z7B)inb+z6%4OLe=gQK<*@CCVGIQS*c@pr!pGZ+A-@xR}) z2^M>*(eHL9vCnULfM5A+aL{?W-0jF}Z_10Gd>%}H%-KwzI@^28JzdjBiGPg-}az;w_wvf>_Up`gRbFD^Jc+==EI$E zV2MSyGOTN+dQ|TpbxggX8jb}0bH{_?zq^;7*c7e-z4dCD7fUwHirHxo+=F?)=;6sP z%XTn}M!nNikF@T{5yrnu9^e;#8@$B8t#-5JK)+x1KtI1{d;f6Q?t}YXSvCzrg_fOc z{day3nwR&(0Y588$k`&jLGtISP4a*h_ZpU8 zS0`9DKCbO>p@qKOQuTUZ6U1f9H?1Y?<)uC_`$DD6`P+?qWKv#nV(UF`59D~ zzmd%HH`7&qMj_=tM>+YKq>`Ua8-st(1>wt6@a^0>2Ll)KlXCfo%6CjDL$8)2cwLYb zA*2Ly;8IjPWcJL>=3u0_Drn1L-4uqV>$~w;H;lRfSeh5+H%e z{MRf$Ir8jNc%-uNXF~=3OJCvz8_(jQP2#8*Ep1jb>(%mcO^9P0BPAX0Nn|eHeWQgj`_l!3Gh9A0rRQBnI_cyv{o%^u#mx0 zfEvK3b>(2@8DG+!4@d0^-$_H*IgzRfm=g_pC7H6~x9Y!Fuc%ru9XyhIy+?J!=#Dmr@IP+5+u4l99&bP1hA&a@W$Z@3Tx^-+itwNK|B@S=TmS$7 literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet b/cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8c2c01dd632bc96ee9b9ae7d157c652ea2a1d598 GIT binary patch literal 2309 zcmcJR&2Jk;7{+%u8^?7V$1Tj_EruFM7XxX@M}rDVp|g(NCUp|mB=*J%g0;Qaq3hWA zBTgzII3PeADh>#7sCwXl1gZcbgb;!rdf-wa@dt3I!~rfuJ#s)Q&&+zU(;B#tm4CZ4 zvperR@AJ;uPSNxvk;zSo{3a!dL4-qs@P&Bmb5Rfk2@C)k3<3`r0$y+c_`opmgAot_ zqaX+l0)!Io0rvs|6mTC10Tmnu_k%Ea06YkefCzX9JPaNIW8f&zz@s1v9s`eqC%`x$ z0>rP6;}j$w;t7XjHX4#y9*Pj{>9Gk{S!#yp895#C&za$~AB`t}nN5u?u2?S^A6M3Y zeeI>_rQ*hw@~5BSIqe5IOeA$i@C4ORpkkSgR^6&to6Q$DibPTj1ATH1>lQ%^2mY5_F?)&Lm&$*T|+nL48LT0~P4%bd*V{bX!5+L@oMDMkM z8}8VkMCEP|0gi05stykPbW)UugHtif8X7`eVY|h`_|jdoFvVBtXip6 zH<5bUYP9U;IctX-@QtKgm$WNG+8={(pu2{$cg4OAM95f@czOQ8S>_7cVJux{0n)+! z-gMaAPF3fka<_-!;4F7Uns_}bNP}+@KlBfG^#`DTxn8vD@!CdV-l^QzqH;~tzVm5+ z9)R#}G-Yq>zQVo4pCl3&F8i5ah*ikpUa@SnBX%XqYvA-SJRD_Z9q^wOL|M?@4Rmz| zb-P+Ers`IM?K9Wn_eJHhsC_V^?fRkg9|QMQicObNGZjNf#N5h!gZ#~tX5kj6up=?!5;=|%cr8{36XR!Z{ z>P*;MEobm9Yn8Y}TJ9S*RD;#zhg zRj8!x!i7xR%q}hDFlwbRl_wqNPPN_sd2RwT69-R%w-T%w-!89RSlY@P`c}S@X%!O- zc40cxD8@VaCS5%peJ~NlKd2UdhmU-wZJ9EEzmoBqQKAR~c_D(K4Y3Dc7 zFPm95twiT|yaKCd=Wz5pol~cTP^4zoUu{{3DrW7>YrR?HxHdT+%=%Je zZ1C*WmJP%|*Zgjm8DcUWcLps3#OrG|5dU7+ow}=AVd>1Dc69mHsl)8d2A!#OJpQ_2 zW65_nb%?w3&IB=rvOF7)8_1be#+*5AV19DxG4UF+G#Spu2*i9e1n0k-Hdw#k(oOov z0?RT?Kiskbe7&t3cZY@P0;^tbuYma8?1VY*)XJe<$(JA3bL%fDB9<=5}0{5lPkU$4UpNCa zA`k#5N)wNQ3QBIvnW7{-niD@xOzxCGOD2xie*VGnn!iWkytK| z;?YC8p$&0>IIZyj*9)9Lid}L@Q#ydh%fmo`mhl8iGbmcnlns%lVo2)ug5`tkyk1Us ztHp>=DLo5S$&2*z9_qIO%|L(z@lqOSor+HrfI2-SA-$}Y(?X?O7)e67DGQO1+|TyP zLUu0UTRti@Yb6#M4G-We(iDdpzF43&Oz6SvrwKrX9v9*tRNK-~u}3lxjbRe*ooINf zy(e^Jz(n1u1BJ5n&u zeHI81Gx8~xY*)7q98!ItV5DEb(9hU`M6Y$Ym$^ejFTqN zXW#)42M#<54}dsvMu-DP9smi689OL#skoqyGTzIRQw2Ge%(dny zVg3*dD(VHIBy}w@=}*02?#xD$=ZB*sDl(OQ5!iRZ^v(Fcl=V$OCsah5Bn*Hh!z5*6 z7)ps7m^qx6Xfy_6>i@lG6f7iJ@d_F5-s&44Tt9qAL z+WP4W(AIgg*=XO3nyY;kZ=o%?#W{Zfj<(M^1O~jq`GA)OVa$R9 zb)1zr-@_5se@0m0Rw=w6iQuzTN9pkB5S}OohKZKez_C@Wj9U5H(_W`majWCwhi$mK zdkQ!2;IPB z4y3!>T10h$LWGWp5$`0F#z$l!Z$DQ%1=d`*)Aakf!V?F1p|tyY?g`?vbRmJ>o$O0jJz zE{r;3%bV(cZ!~PXme(7Z?Vc$_MJ6W1mok$dy-Ebp7$!*c!g zlU~KvNRpFf@$IqEwFbKI1f@?!?@&5e`=?$o-z-RKr|M8Oh`Q;dtNZRa0uz~(kk0P= zLCF901;`IBx%|Ky%l9v;_$?yhzd^i=JY%R?dKEPonJx!sE#pkrpQ~ErQ+ONa-m6jl>oVje}Au4n^icYbrheq?xoqCYQyuMgxo4CAw4QV)*&S;@c*!~d_ zyx|JcLem6z@0ufp-2$UxTQVN4qBINF^LRN-qI4clhVzLOrZ8VM^G)*_Ug@vSc8#ck z6wIbVhG^+_tp%2`8qcDqkYB`>_+<{5!gg%upBeEc!Yk1>n*g?Bl`#lz`-cTV>r>lh zX(3BbX~av!9Q`StAWpG8RjBVHUbELmJZDSsx234V&>buC$WFQbjQf!EIJuBcTV)=r zNV&@Mxzx`dmN$#>axj~mF0#?$ARpXcB%{6HG3+a9G%x4*)VCnTMT&%Kf=6JvNtiyr lMC+Y#lI(J8g*=u9891} zRb5nBR8`b<)m4;Le?Wgge?u2tbk$!FWzmHfJ?G8>n)R8^ReVp%p=bY~xh9phz za?2dyT*q)DpAbTLKES#BJ7mT0TkEL$MY~n3DV)noyaDe6p75)m1xyRrJ8Am!MCygcWd_7|t0Ral)u^^=a?Kb_$NBs244j|VMmYDh zwk#R7N#VSorJUF9k-0~fbhUU-UWjrA?OOTL`_~aDe!)Ki((4l?%D5 zHYM%>RQQg|R?z17W|0!k4f~vhXUxRfQ?+R>+2v-=I21%U51>vH#SS?RAS<@+OcN8$a2;9Qnnu(7W@ap7FYyQb z*Z1dXs{A5dl%Kn8^8ZtxmiL&N>lKU*9%JfRIJb#r4<^fusKnr z^aP(Glpf(HjEA2vdWJJWFrx|Fc2V-`Bsl(uc_Ner zA)4G11cXl=G5?O4@z7PEg5$WNm6~Zy*2XybP`C?teI*v|6qpS?K|J>O72d^^HNK7C zOWaimWx9oUW{}%E?J2ngat-CPm%D`&?F&6YsE+uZGg1jO8l{5L1LS@vYI5teSF-UN z`dLUm6@;htupnR#4$L4zv~j@h?1ms>J)cWxz@wwYGi51j#y}sQPY+n8pAE-WybhWD zI9`xdh}UH=ABqK~Y`B{XX$^W7wa%v^6Gi0ZLnj{wJ?0}Ko~SADYFEkvpF^!YCEgW` z4?EtBcf%Tsw8~}~s7(({I+h;lrOqGtRU)HuI1h&1;hegh{)9{mG9^RSNB z5xpGoG3)b&{tq@mH{afpjrBrt|nPu8&E+7)r5U$R(~oH^bV1 z(Dacn;4rBnfT?f#{5kP$cO?Dmu4+7zJDo?8uDAQ!J^0-Q@88?Idyj*UgYfYbd Nk9_GSBniI^{{`9oHFE#} literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/PERSON_LINKS.parquet b/cli/tests/resources/omop/private/PERSON_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bef07a1a24948de388872a05c7ea6a1cd68ebf1c GIT binary patch literal 1953 zcmcIly;B-d5Px@^ClHcR$9HqgTtT5X7RK-)h?#LF?~o4_6;b>k&Ny&ffC&OQ{0J#i zr?AST%s8o1rm#w#Dy2pKloTnldx)HfnH2hlxA%Vg_U-=O?jDz6*#H%&OqDbhDH$Wg zGtCh~c$f~Dl+bZ1Ps82_Oi_^q)XDpbk0d%IrW9={FgiLH1HynM+zhG04{wzUxIEQ<^k^A;)z;WKpW)QL!h|9 z^S#jHIhrXPRgL=5;;~sM){GUSZR309P=E2jxaX?R@j%8Yn(k(H;)4@{2ZEv5aAYp( zg7tiKE)t#%1p~tX!v3HiVE;`bWXg4ICp){x`>$~}tWeKYy4@MGdRQ>qt7hHe|C5t0 zIrWREejifqayO7Xq(2ROuq`mdPv}^$6Cxrghh?kcgs5jOdm3v6-B^FMX_zNf{W^_X zQeM4usuxZL&JK(>LdVlsb%^IwJmCLvC-4}RdVysDwX%Tw5xXFLrL#+IEbaU7ACUT(ksScfx00@lf13LJNR}Z5$XXu zTV*CmTEW}l@zG3w!a8~zYa)qW#k>Ciua)|cZ-9FLeR&V9_Asp>{TY%()hV5e z*~7Bg^_}OPPLY2*p5X{FjWM0uD_c4Z3|4U!V{8eZfV)pXIaFC5 zTKG(zqKu+I$1Ju5K@i4cam1RnZ!Hlr)_|#L`DC`8-(7pR8QczoB8Sv)mu;fZ_O7tT zDu~%@6qbKnFqTD;OTMk_1-CQ#+FDJo=bNxj=XcRm+qmg3*7dJx)=9W5TA}C+$OHIG zEjNohiMEjow2NQ?`@kDhwHW_DJ_&zeXZyGm&j$3vM75ag@`*)n@&TvX2Ie=(n9de4 zcqbUHAmox&pXCm)B#IbnQyScL?W9MvD&En0-+iIjNiKt{{{9RsR)E*;N$!kvKUu2 literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..277b5c25538944be1d44d4b7aa4edee99e5ca035 GIT binary patch literal 3665 zcmbtXJ8#=o6lP?{iB-i#5>u5>q3|LCL7M2{q>Tz>xYXOV9Z8M#;9^K5Wy_{ShN7g% zW2X+8ilRfN4jDQZ9x`|6*db$ogVCu&hYUUE@*z@@iCQ6~%lkUt`OXtkh162)D$B5e zT^N`<48w3-j15HN%vv<^nWn3vERA&eP!+|IuFC^aKI}`w z6-+c8a>t8c^!jd;Usrul-d%TiL}w_*RFe-E8g+i%2kc_a0drB32h!o3VSldqp!|Eo zF)TczWTVU(_ty;{q*v<>pSyik>UDdvzSyk4{SxMNFvbQ=Pv3koY7dN_F?B`h{kji@ zA3vE<825%fwLol6n+2cVH!)Y!C=InP%#{ zyC8WOHUt-4s@#5$GrKX{7{5T9Ad6#XVT74zEPAng1Lm&Oak*t%EO+&u$}QJKxsU3a z+;VM_TfQ%HOEyC8DxHs8vej|R_crb$n;Exs_X_{85{6$H;WsOr3{*!XL~6BXjc?3e z7I_;?;B%kk2q9JY;`1#b6T%;n0wG5vdqnqmo{&A9WeF+c8`kz4R*!H;1u@u!Yx@(p zmK8yqJg3*Hu+EiHLzm=XIstJwfb|q6q>M^%%Nn2Mo+J`aV3jYxZGfUA=oFWPKfuB0 zq-&bcR}@jnoOQPA(^k6PmHLID)WGv>9!)D&wi5~9MXF_L-!4pNrV>)bvl2h;r1uq} zRG$j<8CENu83E=*Dr7VP>`!zP2`c^_j=X5CdA>$>hjf=t>CnJYEbWW|<5X(yD9!Et zF}zPeN4nK`nG%2zSPga|wQS4ABga(!6i3r5bp0Ghsjr;1o3$?RJprt#(0Gh^zz^*8 z@XH-IcrU5Eh@%US#!qb4k2`~!CY7i<(j9+o`s;|~6Ju&G@5iUp$egwtf+jR~x?X?f zCSE=~vGKB|ed-*a5B-&{kAyN=ONo8RyOJ`2IGl9S5RWFG zY^D{e->6vrBzc;|yv@I=F3d-I%bSlL`=OAaz*1@JRSwT{_y4k;_y(@?)u5?S!+AL=JRt0 zrNm-=DncFF{v6u=v-}D6L9tW5B{Lbo8BUVtO}^AK?)af9FW(RA2^-f0oeQ|sUZlg* zq9IS8GLbE5a)u7C6&px@ap8!Q#}#4ML%%2#BF((}8e>A}>P`+n?f3 zaOtwY!L=*Ll`F@E&o8YKF;gA-}w?!VwEWM(=c5wP=%63LPA>-Aw-7h zgXxD!30@SCdq2>JgO-(3@Ey&U%;SqXZkHL*X5(p;hz+Krsu zeHyb9m^zx(vyA8jK%Ze>&|ePwgdr}&0|2GPrH`kESaGpjTqbTW)p2T-sN1t&G-Ab0 zbARX9sUG2q2^XgF7U zTJ_vRPt~f2s_NfRReP%XC#-ttc@I5TRn?hi@M2=DCb7U~-kbT&%kBf$Nbp0B;dVZXSXQ9FZvN_yZU4Zw zAIK}#FYb7lfA6!+{hBh>C*zL8_-mhs@ppG^;}2}#(qt?tn&E``*9+#yEgL3e!x)#E z(s9Llx#bbkulIl|i_Se83+Xg~*cDP<-SYtXeLKSLS|B^mO>t}tjLQ||&dtk5q z9)bO{8({+$FRV?jJ~5|~(zy^-bCuvfc0GW;3D`h;l%x$He?9O3`Szg=^4Lk!2DHBe z4z%#wG!xp^r88wDvX7xBqrSBkj0VaowjA!PHkY;Xu3s$Ktp%)|ja3cZ4A55#HTtOh zaxd};_y+I0pkkMy1-M>W3pY*{-ZhW4z4`nZHUK`{k2K1ZBNzp0Ts^DRU5`Ups0EpD zB>d*O6OX&*h~us`)3_TBG43`~i@VX7;;u87xEl;2?lzN$yVa=SZaiDK8x0fgcGDyH z7QUUio#2bRj~FQGP=M6yUkYDX!zA>JKZe(*{Tv~r25-EM2$>PSPYQ(eNw!bdc%G1B z{FEi+5T76~pCH9?Ndhx6$K_HD)c8{{CpnbMGf>YSqK6Zb2h|$NpQBL^Z+Yq(H^@;n z6^j85sh!frGA`Po5mLl>iUqUPtY35mb=XW*G`ZO5Ny$t?8dcP0s(scyXsCdHimA2N z@JefV)b&Bb6q|Jy3_>oe0FoYNVlfOiU!Xx|F+iFKMZ;7f-?>z53`HTKcGb+hn`&IP zl8v4`D(JF+`>0<--Uz9jEb+4Lc;}$RpNq|k+JZOa7>9flQX(_#Cey=qlaDu(Dk_n~ z>Sq>TBu`JOxRh&$?);&V3xBO-#)Nohav`I&Q3w3TE@CnI@fnq+h0K#Px=Q`ge5gpO z_>hIP1{lsbOY@H{{s>QtrgXB9^EsM7zs4sP4&lF&g@P#;)wn`4$z~V+v*g&eKXT^} zR~_eZ4*aQrWwa?)AL^16X?N&^lH>j$J|zB;O{t}ND6z37(xA5t>n9#l^PM@2K@kG+JC*pKEneAV| f|AWC~yq`!tjX#Zt-@xyJ|MOzb{R~rue?I>KoRJ16 literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet b/cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bf6850239f9d58d95a7688f6a36a8e60ea398d4d GIT binary patch literal 928 zcmcIj%Syvg5S`qHT2Z8kHzbfnHwG8=QB+XT#iYddqZYL2Mq(Ojwbhua0hg}*0GBRZ z`U!r6pW+udbEC8tap#a^?wq-E=FCk}!D>nBGB3xr$-yW<&ei}x&@DlZN~5UKS(&v> zol#Tgya5wE5dnh*PU@PL5zdQdOJU7JHgB0johS^N?eOxXS(n1ljVH}`)|>?=_ZOeV zT|o-gkS9lC`a}*8_Pa>Vq*7UAHzK%K_yFBqgUrc#(5W{v`f=R z`kPW~c^=GcCpZh+zSL=qaq;6EQlzgb7`kO!#eX?G=ii1kUg@1WIpa-?0a&&Xki? zsfbK1)e7gxxjRR-%}UU&wt|bp#INmc9us<3q0)qRd8#uN9k344v(CE6_PKd#-~6g| z4}XW!?tQR!c<$Fqt<%UYE$VomU;l}eEioXqltoOfzIm)q`y@o+lwng8=J!2ITs7t`Cm zn0%Fbvl9UE?XCwAHnq`sX!y(g@1p?Yo3LjbQRI=B(w0O_ZQhxD#ZW~eCB5ke|U(R~;3!NmW_aIu}S($xks zAH%zK5ZZW^*6q%($3f&&r;9}h>vg|)B2dsAA3=~+jUI5P335TZi{f+F$@Q|TY(jIP z*mGa)?WN7-wD&hu$lhF+bS}!p6Ynu1II3PLgX*-AuD0b`1w_{#3XEyh zojZ6Kj1R9!r^;fNFw|l;E!WRX{Jd<|rLJ82l3_yO^K3Htj0{DL2H?b<7jlerj!4=0 zpaF=N^s*4M->gaI#XxPV1N~aAOYLTA7g#d zWD_4>JtSQ%70rtx<1}{s128_hF-Z%NL{XyBiY#@@^<}9nNnOOj__Bw};7ljo;bBZg zg;azO%lAB#^7Th^VIc537 zH3x5nFG*HUnneQGk5_a9znP(Swe#_qJJ?Rr5HhRJNXHF|#;u$-6GNJBQNn zh{4+8=p5`$xmYD{idm%Av;L5QgOEc0U!EMvk9QA%1ngZ5!^h~#c-h~rV~w*E34soT zTf`c@6eox&`ppVG5Aj|veZ(7`ssGK?jrHhR2dXHint!cz2!TE}82ino5p7B{zo6uu zsM$jvPcC0yX5mUjUrQAQ+K+rmuQqAQXanNDS( literal 0 HcmV?d00001 diff --git a/cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0e2e7bc046eaf72666d4e7e14017729a6ceab6f4 GIT binary patch literal 3429 zcmbtXO>g365cZO6HneQjZUcf9s&c_n)$U@9(p@U`@R|TgmL=deeE5Ql%~xVDH3pO% ztE#7-dfZdhUr_Z>?XlV)up@{xtp9;)k1{@C_Gcyrfb^1zL-#^cYsJ}Cd}I6T4@ig`I|H$8*0llYf# z2Fe=mToe2%fQOQ=nS1Nhf?i6`n!D6wExl79OgIGOd~%*(it&XHQFj8C^M4V{hKz*v&c`yKxI+Z`8Bcn>H!-HeHFmT^nL=(|6e0 zHJZpA{-m?Jk)O8q7!Y|hL`3nl_O;b_qVr%9pRc-ygpdmS;`5A<84k4!938?2 z49n?c5^zZ6j4l>&(V+%-$fqEV6uzW4>L){~CU(?9rlatglrku3^|aJ$9aS}CimrX< z;2nG8A-o4(gU?JFe08SgGb0Jl5FwZBB$IUbEq-Pz_FND|x{9pP(ZwUWgIZDGmgh^xTy_&t)lTfx1qIc!&@* z6J}EVymP58a0_l~`H7_$ zlB5327W0ZuFFiq!75>M-u2q1B~iK%4aSb;Hz5DObIM{We^Rz)3x$}nkG551Y~lk zCzuIYi{yIzPGQgCE7cAlH)2mMKz@%5u%z)Lrwe?W9}W5nKPWmlZ}Ur+IM6uoM4T!o z*HWNse7Xb*wm|FCTJYeTJznFx9$y-tMSPXC*i(j}(L$cXI6(bFsvG#WpF_ugTR&7A zEBSKaUDsolv-#NhN6}ZXpMW0<`a_fNh^XRSuQ|-eB0hZPyaBbO460bGRA+z(A?$`rAho4U6{$txunwgyP21p?C#9Y{P*W^(`t!|s-k`p)X##kV<`U<)|Y^iP!VBbumIHr^?Sg*0(%Sg4(uj1 zW@r-TAa&?XQh$^}>gZP8ZFK5w&)@UAEJc3}1_OYc zr|NiXF{(*8qhRO(@GKldCzdcX*!n{BG6tMxa-M`av-M%o*S205nvZ z(47XVXH!IzTW-a>bf^q%zb8(8f5FK=JqkiMk%+43aXmfLzl1TPH#gI6tJe`#F%^PZ=w-Oqs__NZ7|+hOu2-!5^*Q z;ZcUyTX@K&GW#eEj>#%K_!4KlXgGN4i|yBylVl-h*A8lK-EP7@YZo!q+!)p~mdA=G zyAyhxY)p^yS;wi1v-Lu!v|Z@ulZ6z&D|-Z`8@M$`(C~1uJz-`qc&@GR`A57hUcd$T zbcA#1vi8O)TIXlUgNst}S>J~H?80W#=6XHs&kpu&ID-2E=P}U|O~-%L@!HF7v$^c} hexsM@dVZ_gs3+FeS5{Y6^>mz&;71<7rz8g-(7)JZDp>#k literal 0 HcmV?d00001 From faf50dc0c25bc8921a8c46cd0ce49b3cc673b615 Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 14:19:57 +0000 Subject: [PATCH 06/28] Add dependency for reading parquet files --- cli/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 9f5764967..fe40e5de7 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "click==8.1.3", "coloredlogs==15.0.1", "pandas==1.5.1", + "pyarrow==14.0.1", "PyYAML==6.0" ] From 9129be3e0b3c8b90eb6bd3ba61422b0b0cb62007 Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 16:17:30 +0000 Subject: [PATCH 07/28] Add procedure occurrence id column to message. Use pandas merge instead of join. --- cli/src/pixl_cli/main.py | 15 +++++++-------- cli/tests/test_messages_from_parquet.py | 18 ++++++++++++++++++ pixl_core/src/core/patient_queue/utils.py | 6 +++++- 3 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 cli/tests/test_messages_from_parquet.py diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 0be7cb635..45915b9c9 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -395,16 +395,15 @@ def messages_from_parquet(dir_path: Path) -> Messages: # study_date is in procedure.procdure_date procedure = pd.read_parquet(public_dir / "PROCEDURE_OCCURRENCE.parquet") # joining data together - people_procedures = people.join(procedure, on="person_id", lsuffix="_people") - joined = people_procedures.join( - accessions, on="procedure_occurrence_id", rsuffix="_links" - ) + people_procedures = people.merge(procedure, on="person_id") + cohort_data = people_procedures.merge(accessions, on="procedure_occurrence_id") expected_col_names = [ "PrimaryMrn", "AccessionNumber", "person_id", "procedure_date", + "procedure_occurrence_id", ] logger.debug( f"Extracting messages from {dir_path}. Expecting columns to include " @@ -412,24 +411,24 @@ def messages_from_parquet(dir_path: Path) -> Messages: ) # First line is column names - messages_df = joined messages = Messages() for col in expected_col_names: - if col not in list(messages_df.columns): + if col not in list(cohort_data.columns): msg = ( f"csv file expected to have at least {expected_col_names} as " f"column names" ) raise ValueError(msg) - mrn_col_name, acc_num_col_name, _, dt_col_name = expected_col_names - for _, row in messages_df.iterrows(): + mrn_col_name, acc_num_col_name, _, dt_col_name, procedure_occurrence_id = expected_col_names + for _, row in cohort_data.iterrows(): messages.append( serialise( mrn=row[mrn_col_name], accession_number=row[acc_num_col_name], study_datetime=row[dt_col_name], + procedure_occurrence_id=row[procedure_occurrence_id], ) ) diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py new file mode 100644 index 000000000..1fa4eddc3 --- /dev/null +++ b/cli/tests/test_messages_from_parquet.py @@ -0,0 +1,18 @@ +from pixl_cli.main import messages_from_parquet + + +def test_messages_from_parquet(resources): + omop_parquet_dir = resources / "omop" + messages = messages_from_parquet(omop_parquet_dir) + expected_messages = [ + b'{"mrn": "12345678", "accession_number": "12345678", "study_datetime": "2021-' + b'07-01", "procedure_occurrence_id": 1}', + b'{"mrn": "12345678", "accession_number": "ABC1234567", "study_datetime": "202' + b'1-07-01", "procedure_occurrence_id": 2}', + b'{"mrn": "987654321", "accession_number": "ABC1234560", "study_datetime": "20' + b'20-05-01", "procedure_occurrence_id": 3}', + b'{"mrn": "5020765", "accession_number": "MIG0234560", "study_datetime": "2015' + b'-05-01", "procedure_occurrence_id": 4}', + ] + + assert messages == expected_messages diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py index df9fff116..1d54645c2 100644 --- a/pixl_core/src/core/patient_queue/utils.py +++ b/pixl_core/src/core/patient_queue/utils.py @@ -30,25 +30,29 @@ def deserialise(message_body: bytes) -> dict: return data -def serialise(mrn: str, accession_number: str, study_datetime: datetime) -> bytes: +def serialise(mrn: str, accession_number: str, study_datetime: datetime, procedure_occurrence_id: str) -> bytes: """ Returns serialised message from patient id, accession number and date of study. :param mrn: patient identifier :param accession_number: accession number :param study_datetime: date and time of the study + :param procedure_occurrence_id: the OMOP ID of the procedure :returns: JSON formatted message """ logger.debug( "Serialising message with patient id %s, " "accession number: %s and timestamp %s", + "procedure_occurrence_id %s", mrn, accession_number, study_datetime, + procedure_occurrence_id, ) return json.dumps( { "mrn": mrn, "accession_number": accession_number, "study_datetime": study_datetime.isoformat(), + "procedure_occurrence_id": procedure_occurrence_id, } ).encode("utf-8") From 273e38e74b5396b7abc0ee0ca46ee7399dd6474e Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 16:32:55 +0000 Subject: [PATCH 08/28] Remove option for CSV cohort file --- cli/src/pixl_cli/main.py | 65 ++----------------------- cli/tests/test_messages_from_parquet.py | 16 +++--- cli/tests/test_queue_start_and_stop.py | 47 ------------------ 3 files changed, 12 insertions(+), 116 deletions(-) delete mode 100644 cli/tests/test_queue_start_and_stop.py diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 45915b9c9..a6aaf1d54 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -72,23 +72,15 @@ def cli(*, debug: bool) -> None: default=True, help="Restart from a saved state. Otherwise will use the given input file(s)", ) -@click.option( - "--csv-file", - type=click.Path(path_type=Path, exists=True, dir_okay=False), - help="Give a csv file as input", -) @click.option( "--parquet-dir", + required=True, type=click.Path(path_type=Path, exists=True, file_okay=False), help="Give a directory containing parquet input files", ) -def populate(queues: str, *, restart: bool, csv_file: Path, parquet_dir: Path) -> None: - """Populate a (set of) queue(s) from a csv file or a parquet directory""" - if (csv_file is None) == (parquet_dir is None): - err_str = "must specify --parquet-dir or --csv-file, but not both" - raise ValueError(err_str) - inp_source = parquet_dir if parquet_dir is not None else csv_file - logger.info(f"Populating queue(s) {queues} from {inp_source}") +def populate(queues: str, *, restart: bool, parquet_dir: Path) -> None: + """Populate a (set of) queue(s) from a parquet file directory""" + logger.info(f"Populating queue(s) {queues} from {parquet_dir}") for queue in queues.split(","): with PixlProducer(queue_name=queue, **config["rabbitmq"]) as producer: state_filepath = state_filepath_for_queue(queue) @@ -96,8 +88,6 @@ def populate(queues: str, *, restart: bool, csv_file: Path, parquet_dir: Path) - logger.info(f"Extracting messages from state: {state_filepath}") inform_user_that_queue_will_be_populated_from(state_filepath) messages = Messages.from_state_file(state_filepath) - elif csv_file is not None: - messages = messages_from_csv(csv_file) elif parquet_dir is not None: messages = messages_from_parquet(parquet_dir) @@ -327,53 +317,6 @@ def from_state_file(cls, filepath: Path) -> "Messages": ) -def messages_from_csv(filepath: Path) -> Messages: - """ - Reads patient information from CSV and transforms that into messages. - :param filepath: Path for CSV file to be read - """ - expected_col_names = [ - "VAL_ID", - "ACCESSION_NUMBER", - "STUDY_INSTANCE_UID", - "STUDY_DATE", - ] - logger.debug( - f"Extracting messages from {filepath}. Expecting columns to include " - f"{expected_col_names}" - ) - - # First line is column names - messages_df = pd.read_csv(filepath, header=0, dtype=str) - messages = Messages() - - if list(messages_df.columns)[:4] != expected_col_names: - msg = ( - f"csv file expected to have at least {expected_col_names} as " - f"column names" - ) - raise ValueError(msg) - - mrn_col_name, acc_num_col_name, _, dt_col_name = expected_col_names - for _, row in messages_df.iterrows(): - messages.append( - serialise( - mrn=row[mrn_col_name], - accession_number=row[acc_num_col_name], - study_datetime=datetime.datetime.strptime( - row[dt_col_name], "%d/%m/%Y %H:%M" - ).replace(tzinfo=datetime.timezone.utc), - ) - ) - - if len(messages) == 0: - msg = f"Failed to find any messages in {filepath}" - raise ValueError(msg) - - logger.debug(f"Created {len(messages)} messages from {filepath}") - return messages - - def messages_from_parquet(dir_path: Path) -> Messages: """ Reads patient information from parquet files within directory structure diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py index 1fa4eddc3..983601234 100644 --- a/cli/tests/test_messages_from_parquet.py +++ b/cli/tests/test_messages_from_parquet.py @@ -5,14 +5,14 @@ def test_messages_from_parquet(resources): omop_parquet_dir = resources / "omop" messages = messages_from_parquet(omop_parquet_dir) expected_messages = [ - b'{"mrn": "12345678", "accession_number": "12345678", "study_datetime": "2021-' - b'07-01", "procedure_occurrence_id": 1}', - b'{"mrn": "12345678", "accession_number": "ABC1234567", "study_datetime": "202' - b'1-07-01", "procedure_occurrence_id": 2}', - b'{"mrn": "987654321", "accession_number": "ABC1234560", "study_datetime": "20' - b'20-05-01", "procedure_occurrence_id": 3}', - b'{"mrn": "5020765", "accession_number": "MIG0234560", "study_datetime": "2015' - b'-05-01", "procedure_occurrence_id": 4}', + b'{"mrn": "12345678", "accession_number": "12345678", "study_datetime": "2021-07-01", ' + b'"procedure_occurrence_id": 1}', + b'{"mrn": "12345678", "accession_number": "ABC1234567", "study_datetime": "2021-07-01", ' + b'"procedure_occurrence_id": 2}', + b'{"mrn": "987654321", "accession_number": "ABC1234560", "study_datetime": "2020-05-01", ' + b'"procedure_occurrence_id": 3}', + b'{"mrn": "5020765", "accession_number": "MIG0234560", "study_datetime": "2015-05-01", ' + b'"procedure_occurrence_id": 4}', ] assert messages == expected_messages diff --git a/cli/tests/test_queue_start_and_stop.py b/cli/tests/test_queue_start_and_stop.py deleted file mode 100644 index 21736f5e6..000000000 --- a/cli/tests/test_queue_start_and_stop.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Patient queue tests""" - -from pathlib import Path - -from click.testing import CliRunner -from pixl_cli.main import populate, queue_is_up, stop - - -def test_populate_queue(queue_name: str = "test_populate") -> None: - """Checks that patient queue can be populated without error.""" - runner = CliRunner() - result = runner.invoke( - populate, args=["--queues", queue_name, "--csv-file", "test.csv"] - ) - assert result.exit_code == 0 - - -def test_down_queue(queue_name: str = "test_down") -> None: - """ - Checks that after the queue has been sent a stop signal, - the queue has been emptied. - """ - runner = CliRunner() - _ = runner.invoke(populate, args=["--queues", queue_name, "--csv-file", "test.csv"]) - _ = runner.invoke(stop, args=["--queues", queue_name]) - - state_path = Path(f"{queue_name}.state") - assert state_path.exists() - Path.unlink(state_path) - - -def test_queue_is_up() -> None: - """Checks whether status of queue can be asserted correctly.""" - assert queue_is_up() From 2245c962ac86727b8dd868fb9b96ba70f9a44f2f Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 17:13:29 +0000 Subject: [PATCH 09/28] formatting changes --- cli/src/pixl_cli/main.py | 5 +---- cli/tests/test_queue_start_and_stop_parquet.py | 4 +--- pixl_core/src/core/patient_queue/utils.py | 7 ++++--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 65d1ae12f..610f88bcf 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -348,10 +348,7 @@ def messages_from_parquet(dir_path: Path) -> Messages: for col in expected_col_names: if col not in list(cohort_data.columns): - msg = ( - f"csv file expected to have at least {expected_col_names} as " - f"column names" - ) + msg = f"csv file expected to have at least {expected_col_names} as " f"column names" raise ValueError(msg) mrn_col_name, acc_num_col_name, _, dt_col_name, procedure_occurrence_id = expected_col_names diff --git a/cli/tests/test_queue_start_and_stop_parquet.py b/cli/tests/test_queue_start_and_stop_parquet.py index 42e377674..4e0c5780a 100644 --- a/cli/tests/test_queue_start_and_stop_parquet.py +++ b/cli/tests/test_queue_start_and_stop_parquet.py @@ -36,9 +36,7 @@ def test_down_queue_parquet(resources, queue_name: str = "test_down") -> None: """ omop_parquet_dir = resources / "omop" runner = CliRunner() - _ = runner.invoke( - populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir] - ) + _ = runner.invoke(populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir]) _ = runner.invoke(stop, args=["--queues", queue_name]) state_path = Path(f"{queue_name}.state") diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py index 1d54645c2..1de14c059 100644 --- a/pixl_core/src/core/patient_queue/utils.py +++ b/pixl_core/src/core/patient_queue/utils.py @@ -30,7 +30,9 @@ def deserialise(message_body: bytes) -> dict: return data -def serialise(mrn: str, accession_number: str, study_datetime: datetime, procedure_occurrence_id: str) -> bytes: +def serialise( + mrn: str, accession_number: str, study_datetime: datetime, procedure_occurrence_id: str +) -> bytes: """ Returns serialised message from patient id, accession number and date of study. :param mrn: patient identifier @@ -40,8 +42,7 @@ def serialise(mrn: str, accession_number: str, study_datetime: datetime, procedu :returns: JSON formatted message """ logger.debug( - "Serialising message with patient id %s, " - "accession number: %s and timestamp %s", + "Serialising message with patient id %s, " "accession number: %s and timestamp %s", "procedure_occurrence_id %s", mrn, accession_number, From 48efeea29d9804a16ee207afe5635591c75bad00 Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 17:20:54 +0000 Subject: [PATCH 10/28] Linting fixes --- cli/tests/test_messages_from_parquet.py | 18 ++++++++++++++++++ pixl_core/src/core/patient_queue/utils.py | 3 ++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py index 983601234..49d3062b7 100644 --- a/cli/tests/test_messages_from_parquet.py +++ b/cli/tests/test_messages_from_parquet.py @@ -1,7 +1,25 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for reading cohorts from parquet files.""" from pixl_cli.main import messages_from_parquet def test_messages_from_parquet(resources): + """ + Test that the messages are as expected, given the test parquet files. + The test data doesn't have any "difficult" cases in it, eg. people without procedures. + """ omop_parquet_dir = resources / "omop" messages = messages_from_parquet(omop_parquet_dir) expected_messages = [ diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py index 1de14c059..e22c0a408 100644 --- a/pixl_core/src/core/patient_queue/utils.py +++ b/pixl_core/src/core/patient_queue/utils.py @@ -42,7 +42,8 @@ def serialise( :returns: JSON formatted message """ logger.debug( - "Serialising message with patient id %s, " "accession number: %s and timestamp %s", + "Serialising message with patient id %s, " + "accession number: %s and timestamp %s " "procedure_occurrence_id %s", mrn, accession_number, From 48db87a0770a52238e811976b8e2936f98383a02 Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 18:01:37 +0000 Subject: [PATCH 11/28] Fix other uses of modified method --- pixl_core/src/core/patient_queue/utils.py | 2 +- pixl_core/tests/patient_queue/test_utils.py | 6 ++++-- pixl_ehr/tests/test_processing.py | 2 ++ pixl_pacs/tests/test_processing.py | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py index e22c0a408..b38c07252 100644 --- a/pixl_core/src/core/patient_queue/utils.py +++ b/pixl_core/src/core/patient_queue/utils.py @@ -34,7 +34,7 @@ def serialise( mrn: str, accession_number: str, study_datetime: datetime, procedure_occurrence_id: str ) -> bytes: """ - Returns serialised message from patient id, accession number and date of study. + Returns serialised message from the given parameters. :param mrn: patient identifier :param accession_number: accession number :param study_datetime: date and time of the study diff --git a/pixl_core/tests/patient_queue/test_utils.py b/pixl_core/tests/patient_queue/test_utils.py index 7e2d0c19f..9998218f9 100644 --- a/pixl_core/tests/patient_queue/test_utils.py +++ b/pixl_core/tests/patient_queue/test_utils.py @@ -25,10 +25,12 @@ def test_serialise() -> None: study_datetime=datetime.datetime.strptime("Nov 22 2022 1:33PM", "%b %d %Y %I:%M%p").replace( tzinfo=datetime.timezone.utc ), + procedure_occurrence_id="234", ) assert ( msg_body.decode() == '{"mrn": "111", "accession_number": "123", ' - '"study_datetime": "2022-11-22T13:33:00+00:00"}' + '"study_datetime": "2022-11-22T13:33:00+00:00", ' + '"procedure_occurrence_id": "234"}' ) @@ -40,5 +42,5 @@ def test_simple_deserialise() -> None: def test_deserialise_datetime() -> None: """Checks that datetimes can be correctly serialised""" timestamp = datetime.datetime.fromordinal(100012) - data = deserialise(serialise(mrn="", accession_number="", study_datetime=timestamp)) + data = deserialise(serialise(mrn="", accession_number="", study_datetime=timestamp, procedure_occurrence_id="")) assert data["study_datetime"] == timestamp diff --git a/pixl_ehr/tests/test_processing.py b/pixl_ehr/tests/test_processing.py index dff27aff1..f6ce8f3bb 100644 --- a/pixl_ehr/tests/test_processing.py +++ b/pixl_ehr/tests/test_processing.py @@ -36,6 +36,7 @@ observation_datetime = datetime.datetime.fromisoformat( "1234-01-01" ) # within hours of imaging study +procedure_occurrence_id = "123" date_of_birth = "09/08/0007" sex = "testsexvalue" ethnicity = "testethnicity" @@ -58,6 +59,7 @@ study_datetime=datetime.datetime.strptime(study_datetime_str, "%d/%m/%Y %H:%M").replace( tzinfo=datetime.timezone.utc ), + procedure_occurrence_id=procedure_occurrence_id, ) diff --git a/pixl_pacs/tests/test_processing.py b/pixl_pacs/tests/test_processing.py index 72180368c..e349fc68e 100644 --- a/pixl_pacs/tests/test_processing.py +++ b/pixl_pacs/tests/test_processing.py @@ -36,6 +36,7 @@ study_datetime=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( tzinfo=datetime.timezone.utc ), + procedure_occurrence_id="234", ) From 0915cc821fed1a5c873c2dcfd4d9dbb4b8ef366e Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 18:19:56 +0000 Subject: [PATCH 12/28] ruff fix --- pixl_core/tests/patient_queue/test_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pixl_core/tests/patient_queue/test_utils.py b/pixl_core/tests/patient_queue/test_utils.py index 9998218f9..1e327d16d 100644 --- a/pixl_core/tests/patient_queue/test_utils.py +++ b/pixl_core/tests/patient_queue/test_utils.py @@ -42,5 +42,7 @@ def test_simple_deserialise() -> None: def test_deserialise_datetime() -> None: """Checks that datetimes can be correctly serialised""" timestamp = datetime.datetime.fromordinal(100012) - data = deserialise(serialise(mrn="", accession_number="", study_datetime=timestamp, procedure_occurrence_id="")) + data = deserialise( + serialise(mrn="", accession_number="", study_datetime=timestamp, procedure_occurrence_id="") + ) assert data["study_datetime"] == timestamp From a9691a17503fa4bfc3bcb474b00c0ac7f9ae8c9a Mon Sep 17 00:00:00 2001 From: Jeremy Stein Date: Mon, 18 Dec 2023 18:35:43 +0000 Subject: [PATCH 13/28] Downstream class needs new attribute as well --- pixl_pacs/src/pixl_pacs/_processing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pixl_pacs/src/pixl_pacs/_processing.py b/pixl_pacs/src/pixl_pacs/_processing.py index c644d9d6c..ebac6d576 100644 --- a/pixl_pacs/src/pixl_pacs/_processing.py +++ b/pixl_pacs/src/pixl_pacs/_processing.py @@ -67,6 +67,7 @@ class ImagingStudy: mrn: str accession_number: str study_datetime: datetime + procedure_occurrence_id: str @classmethod def from_message(cls, message_body: bytes) -> "ImagingStudy": From 77da37a09163b775f6f654afb052c60ed9ee6470 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Mon, 18 Dec 2023 18:55:38 +0000 Subject: [PATCH 14/28] Add type annotations to `test_messages_from_parquet()` --- cli/tests/test_messages_from_parquet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py index 49d3062b7..1be9c1cab 100644 --- a/cli/tests/test_messages_from_parquet.py +++ b/cli/tests/test_messages_from_parquet.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. """Unit tests for reading cohorts from parquet files.""" + +from pathlib import Path + from pixl_cli.main import messages_from_parquet -def test_messages_from_parquet(resources): +def test_messages_from_parquet(resources: Path) -> None: """ Test that the messages are as expected, given the test parquet files. The test data doesn't have any "difficult" cases in it, eg. people without procedures. From 3b371831152dd2f542f4cd5546f16ce2feb9c6e3 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Mon, 18 Dec 2023 18:58:58 +0000 Subject: [PATCH 15/28] Add project name and OMOP ES timestamp to rabbitmq messages --- cli/src/pixl_cli/main.py | 21 +++++++++++++++++++-- cli/tests/test_messages_from_parquet.py | 12 ++++++++---- pixl_core/src/core/patient_queue/utils.py | 14 ++++++++++++-- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 610f88bcf..e9e3bbfc2 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -316,7 +316,9 @@ def messages_from_parquet(dir_path: Path) -> Messages: """ public_dir = dir_path / "public" private_dir = dir_path / "private" - for d in [public_dir, private_dir]: + log_dir = dir_path / "log" + + for d in [public_dir, private_dir, log_dir]: if not d.is_dir(): err_str = f"{d} must exist and be a directory" raise ValueError(err_str) @@ -351,7 +353,20 @@ def messages_from_parquet(dir_path: Path) -> Messages: msg = f"csv file expected to have at least {expected_col_names} as " f"column names" raise ValueError(msg) - mrn_col_name, acc_num_col_name, _, dt_col_name, procedure_occurrence_id = expected_col_names + ( + mrn_col_name, + acc_num_col_name, + _, + dt_col_name, + procedure_occurrence_id, + ) = expected_col_names + + # Get project name and OMOP ES timestamp from log file + log_file = log_dir / "extract_summary.json" + logs = json.load(log_file.open()) + project_name = logs["settings"]["cdm_source_name"] + omop_es_timestamp = datetime.datetime.fromisoformat(logs["datetime"]) + for _, row in cohort_data.iterrows(): messages.append( serialise( @@ -359,6 +374,8 @@ def messages_from_parquet(dir_path: Path) -> Messages: accession_number=row[acc_num_col_name], study_datetime=row[dt_col_name], procedure_occurrence_id=row[procedure_occurrence_id], + project_name=project_name, + omop_es_timestamp=omop_es_timestamp, ) ) diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py index 1be9c1cab..ec081ac4f 100644 --- a/cli/tests/test_messages_from_parquet.py +++ b/cli/tests/test_messages_from_parquet.py @@ -27,13 +27,17 @@ def test_messages_from_parquet(resources: Path) -> None: messages = messages_from_parquet(omop_parquet_dir) expected_messages = [ b'{"mrn": "12345678", "accession_number": "12345678", "study_datetime": "2021-07-01", ' - b'"procedure_occurrence_id": 1}', + b'"procedure_occurrence_id": 1, "project_name": "Test Extract - UCLH OMOP CDM", ' + b'"omop_es_timestamp": "2023-12-07T14:08:58"}', b'{"mrn": "12345678", "accession_number": "ABC1234567", "study_datetime": "2021-07-01", ' - b'"procedure_occurrence_id": 2}', + b'"procedure_occurrence_id": 2, "project_name": "Test Extract - UCLH OMOP CDM", ' + b'"omop_es_timestamp": "2023-12-07T14:08:58"}', b'{"mrn": "987654321", "accession_number": "ABC1234560", "study_datetime": "2020-05-01", ' - b'"procedure_occurrence_id": 3}', + b'"procedure_occurrence_id": 3, "project_name": "Test Extract - UCLH OMOP CDM", ' + b'"omop_es_timestamp": "2023-12-07T14:08:58"}', b'{"mrn": "5020765", "accession_number": "MIG0234560", "study_datetime": "2015-05-01", ' - b'"procedure_occurrence_id": 4}', + b'"procedure_occurrence_id": 4, "project_name": "Test Extract - UCLH OMOP CDM", ' + b'"omop_es_timestamp": "2023-12-07T14:08:58"}', ] assert messages == expected_messages diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py index b38c07252..3619b667c 100644 --- a/pixl_core/src/core/patient_queue/utils.py +++ b/pixl_core/src/core/patient_queue/utils.py @@ -31,7 +31,12 @@ def deserialise(message_body: bytes) -> dict: def serialise( - mrn: str, accession_number: str, study_datetime: datetime, procedure_occurrence_id: str + mrn: str, + accession_number: str, + study_datetime: datetime, + procedure_occurrence_id: str, + project_name: str, + omop_es_timestamp: datetime, ) -> bytes: """ Returns serialised message from the given parameters. @@ -44,11 +49,14 @@ def serialise( logger.debug( "Serialising message with patient id %s, " "accession number: %s and timestamp %s " - "procedure_occurrence_id %s", + "procedure_occurrence_id %s, ", + "project_name %s, omop_es_timestamp %s", mrn, accession_number, study_datetime, procedure_occurrence_id, + project_name, + omop_es_timestamp, ) return json.dumps( { @@ -56,5 +64,7 @@ def serialise( "accession_number": accession_number, "study_datetime": study_datetime.isoformat(), "procedure_occurrence_id": procedure_occurrence_id, + "project_name": project_name, + "omop_es_timestamp": omop_es_timestamp.isoformat(), } ).encode("utf-8") From 42b1c85c48ef0f7b1b605f8e767d654adb6abe9b Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Mon, 18 Dec 2023 19:15:47 +0000 Subject: [PATCH 16/28] Update `serialise()` callers with new arguments --- pixl_core/tests/patient_queue/test_utils.py | 17 +++++++++++++++-- pixl_ehr/tests/test_processing.py | 4 ++++ pixl_pacs/tests/test_processing.py | 2 ++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pixl_core/tests/patient_queue/test_utils.py b/pixl_core/tests/patient_queue/test_utils.py index 1e327d16d..c9b66d0db 100644 --- a/pixl_core/tests/patient_queue/test_utils.py +++ b/pixl_core/tests/patient_queue/test_utils.py @@ -26,11 +26,17 @@ def test_serialise() -> None: tzinfo=datetime.timezone.utc ), procedure_occurrence_id="234", + project_name="test project", + omop_es_timestamp=datetime.datetime.strptime( + "Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p" + ).replace(tzinfo=datetime.timezone.utc), ) assert ( msg_body.decode() == '{"mrn": "111", "accession_number": "123", ' '"study_datetime": "2022-11-22T13:33:00+00:00", ' - '"procedure_occurrence_id": "234"}' + '"procedure_occurrence_id": "234", ' + '"project_name": "test project", ' + '"omop_es_timestamp": "2023-12-07T14:08:00+00:00"}' ) @@ -43,6 +49,13 @@ def test_deserialise_datetime() -> None: """Checks that datetimes can be correctly serialised""" timestamp = datetime.datetime.fromordinal(100012) data = deserialise( - serialise(mrn="", accession_number="", study_datetime=timestamp, procedure_occurrence_id="") + serialise( + mrn="", + accession_number="", + study_datetime=timestamp, + procedure_occurrence_id="", + project_name="", + omop_es_timestamp=datetime.datetime.now(), # noqa: DTZ005 + ) ) assert data["study_datetime"] == timestamp diff --git a/pixl_ehr/tests/test_processing.py b/pixl_ehr/tests/test_processing.py index f6ce8f3bb..4dd6f7b1c 100644 --- a/pixl_ehr/tests/test_processing.py +++ b/pixl_ehr/tests/test_processing.py @@ -37,6 +37,8 @@ "1234-01-01" ) # within hours of imaging study procedure_occurrence_id = "123" +project_name = "test project" +omop_es_timestamp = datetime.datetime.fromisoformat("1234-01-01 00:00:00") date_of_birth = "09/08/0007" sex = "testsexvalue" ethnicity = "testethnicity" @@ -60,6 +62,8 @@ tzinfo=datetime.timezone.utc ), procedure_occurrence_id=procedure_occurrence_id, + project_name=project_name, + omop_es_timestamp=omop_es_timestamp, ) diff --git a/pixl_pacs/tests/test_processing.py b/pixl_pacs/tests/test_processing.py index e349fc68e..250d976d9 100644 --- a/pixl_pacs/tests/test_processing.py +++ b/pixl_pacs/tests/test_processing.py @@ -37,6 +37,8 @@ tzinfo=datetime.timezone.utc ), procedure_occurrence_id="234", + project_name="test project", + omop_es_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), ) From bf39b464db46cf0d5c62b20f2a8b6e5fa71baaa4 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Tue, 19 Dec 2023 11:02:49 +0000 Subject: [PATCH 17/28] Add `project_name` and `omop_es_timestamp` fields to `ImagingStudy` class --- pixl_pacs/src/pixl_pacs/_processing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pixl_pacs/src/pixl_pacs/_processing.py b/pixl_pacs/src/pixl_pacs/_processing.py index ebac6d576..9338d89ee 100644 --- a/pixl_pacs/src/pixl_pacs/_processing.py +++ b/pixl_pacs/src/pixl_pacs/_processing.py @@ -68,6 +68,8 @@ class ImagingStudy: accession_number: str study_datetime: datetime procedure_occurrence_id: str + project_name: str + omop_es_timestamp: datetime @classmethod def from_message(cls, message_body: bytes) -> "ImagingStudy": From 90b667234c265df336caca64d0c0cbd00411a6a7 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Wed, 20 Dec 2023 17:36:03 +0000 Subject: [PATCH 18/28] Refactor message serialisation and deserialisation (#197) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Refactor message serialization and deserialization Addiing `Message` and `SerialisedMessage` classes in attempt to improve information hiding and decoupling. * Rename `utils.py` -> `message.py` * Add `decode()` method for `SerialisedMessage` * Update docstring * Use new classes in message testing * Refactor message processing in the CLI * Refactor `process_message` to use `SerialisedMessage` class in EHR API * Refactor `process_message` to use `SerialisedMessage` class in imaging API * Fix `ImagingStudy` initalisation in `ImagingStudy.from_message()` * Fix imports * Fix test: access serialised message bodies * Turn `Message` into a `dataclass` * Fix failing tests * Use `jsonpickle` for (de)serializing messages This also removes the need for the `SerialisedMessage` class * Fix `test_deserialise_datetime()` so it uses the `Message` class to assert the `study_datetime` * Add `study_datetime` property for `Message` * No need to test deserialising individual fields, already covered by `test_deserialise()` which deserialises the entire object * Remove `study_date_from_serialised()`, use the class attribute `study_datetime` instead * Revert "Add `study_datetime` property for `Message`" This reverts commit 87191539881e9a2a3ca2bab99c771ef31d2983e3. * Remove `Messages` class, use `list[Message]` instead * Add type checking for messages parsed from parquet input * Update `test_messages_from_parquet()` to use JSON strings instead of bytes * Update `PixlProducer.publish()` to use a list of Message objects and handle serialisation * Convert JSON string to bytes when serialising * Revert "Update `test_messages_from_parquet()` to use JSON strings instead of bytes" This reverts commit 0e4fce42dbef912f45e9f1b0105bb44cb771c95e. * `PixlProducer.publish()` should take a `list[Message]` as input in tests * Update EHR API to use new `Message` design * Update imaging API to use new `Message` design * Update deserialise function to accept bytes-encoded JSON string * Assert messages against list of `Message`s * Print dataclass in logs * `jsonpickle.decode()` can handle bytes so no need to decode first Also add a note about why we ignore ruff rule S301 * Make `deserialisable` a keyword only argument * Copilot forgot to convert dates to datetimes 🥲 * Refactor PixlConsumer run method to accept Message object as callback parameter and deserialise * Update consumer in `test_subscriber` to accept Message object instead of bytes --- cli/src/pixl_cli/main.py | 82 +++++++------------ cli/tests/test_messages_from_parquet.py | 48 ++++++++--- pixl_core/pyproject.toml | 3 +- pixl_core/src/core/patient_queue/message.py | 74 +++++++++++++++++ pixl_core/src/core/patient_queue/producer.py | 12 ++- .../src/core/patient_queue/subscriber.py | 5 +- pixl_core/src/core/patient_queue/utils.py | 70 ---------------- pixl_core/tests/patient_queue/test_message.py | 47 +++++++++++ .../tests/patient_queue/test_producer.py | 11 ++- .../tests/patient_queue/test_subscriber.py | 19 +++-- pixl_core/tests/patient_queue/test_utils.py | 61 -------------- pixl_ehr/src/pixl_ehr/_processing.py | 19 ++--- pixl_ehr/tests/test_processing.py | 6 +- pixl_pacs/src/pixl_pacs/_processing.py | 27 +++--- pixl_pacs/tests/test_processing.py | 10 +-- 15 files changed, 253 insertions(+), 241 deletions(-) create mode 100644 pixl_core/src/core/patient_queue/message.py delete mode 100644 pixl_core/src/core/patient_queue/utils.py create mode 100644 pixl_core/tests/patient_queue/test_message.py delete mode 100644 pixl_core/tests/patient_queue/test_utils.py diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index e9e3bbfc2..fdaf3d71c 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -16,6 +16,7 @@ import datetime import json import os +from operator import attrgetter from pathlib import Path from typing import Any, Optional @@ -23,9 +24,9 @@ import pandas as pd import requests import yaml +from core.patient_queue.message import Message, deserialise from core.patient_queue.producer import PixlProducer from core.patient_queue.subscriber import PixlBlockingConsumer -from core.patient_queue.utils import deserialise, serialise from ._logging import logger, set_log_level from ._utils import clear_file, remove_file_if_it_exists, string_is_non_empty @@ -84,12 +85,12 @@ def populate(queues: str, *, restart: bool, parquet_dir: Path) -> None: if state_filepath.exists() and restart: logger.info(f"Extracting messages from state: {state_filepath}") inform_user_that_queue_will_be_populated_from(state_filepath) - messages = Messages.from_state_file(state_filepath) + messages = messages_from_state_file(state_filepath) elif parquet_dir is not None: messages = messages_from_parquet(parquet_dir) remove_file_if_it_exists(state_filepath) # will be stale - producer.publish(sorted(messages, key=study_date_from_serialised)) + producer.publish(sorted(messages, key=attrgetter("study_datetime"))) @cli.command() @@ -273,41 +274,26 @@ def state_filepath_for_queue(queue_name: str) -> Path: return Path(f"{queue_name.replace('/', '_')}.state") -class Messages(list): +def messages_from_state_file(filepath: Path) -> list[Message]: """ - Class to represent messages + Return messages from a state file path - Methods - ------- - from_state_file(cls, filepath) - Return messages from a state file path + :param filepath: Path for state file to be read + :return: A list of Message objects containing all the messages from the state file """ + logger.info(f"Creating messages from {filepath}") + if not filepath.exists(): + raise FileNotFoundError + if filepath.suffix != ".state": + msg = f"Invalid file suffix for {filepath}. Expected .state" + raise ValueError(msg) - @classmethod - def from_state_file(cls, filepath: Path) -> "Messages": - """ - Return messages from a state file path - - :param filepath: Path for state file to be read - :return: A Messages object containing all the messages from the state file - """ - logger.info(f"Creating messages from {filepath}") - if not filepath.exists(): - raise FileNotFoundError - if filepath.suffix != ".state": - msg = f"Invalid file suffix for {filepath}. Expected .state" - raise ValueError(msg) - - return cls( - [ - line.encode("utf-8") - for line in Path.open(filepath).readlines() - if string_is_non_empty(line) - ] - ) + return [ + deserialise(line) for line in Path.open(filepath).readlines() if string_is_non_empty(line) + ] -def messages_from_parquet(dir_path: Path) -> Messages: +def messages_from_parquet(dir_path: Path) -> list[Message]: """ Reads patient information from parquet files within directory structure and transforms that into messages. @@ -345,9 +331,6 @@ def messages_from_parquet(dir_path: Path) -> Messages: f"{expected_col_names}" ) - # First line is column names - messages = Messages() - for col in expected_col_names: if col not in list(cohort_data.columns): msg = f"csv file expected to have at least {expected_col_names} as " f"column names" @@ -367,17 +350,19 @@ def messages_from_parquet(dir_path: Path) -> Messages: project_name = logs["settings"]["cdm_source_name"] omop_es_timestamp = datetime.datetime.fromisoformat(logs["datetime"]) + messages = [] + for _, row in cohort_data.iterrows(): - messages.append( - serialise( - mrn=row[mrn_col_name], - accession_number=row[acc_num_col_name], - study_datetime=row[dt_col_name], - procedure_occurrence_id=row[procedure_occurrence_id], - project_name=project_name, - omop_es_timestamp=omop_es_timestamp, - ) + # Create new dict to initialise message + message = Message( + mrn=row[mrn_col_name], + accession_number=row[acc_num_col_name], + study_datetime=row[dt_col_name], + procedure_occurrence_id=row[procedure_occurrence_id], + project_name=project_name, + omop_es_timestamp=omop_es_timestamp, ) + messages.append(message) if len(messages) == 0: msg = f"Failed to find any messages in {dir_path}" @@ -446,12 +431,3 @@ def api_config_for_queue(queue_name: str) -> APIConfig: raise ValueError(msg) return APIConfig(config[config_key]) - - -def study_date_from_serialised(message: bytes) -> datetime.datetime: - """Get the study date from a serialised message as a datetime""" - result = deserialise(message)["study_datetime"] - if not isinstance(result, datetime.datetime): - msg = "Expected study date to be a datetime. Got %s" - raise TypeError(msg, type(result)) - return result diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py index ec081ac4f..c092cc489 100644 --- a/cli/tests/test_messages_from_parquet.py +++ b/cli/tests/test_messages_from_parquet.py @@ -13,8 +13,10 @@ # limitations under the License. """Unit tests for reading cohorts from parquet files.""" +import datetime from pathlib import Path +from core.patient_queue.message import Message from pixl_cli.main import messages_from_parquet @@ -25,19 +27,41 @@ def test_messages_from_parquet(resources: Path) -> None: """ omop_parquet_dir = resources / "omop" messages = messages_from_parquet(omop_parquet_dir) + assert all(isinstance(msg, Message) for msg in messages) + expected_messages = [ - b'{"mrn": "12345678", "accession_number": "12345678", "study_datetime": "2021-07-01", ' - b'"procedure_occurrence_id": 1, "project_name": "Test Extract - UCLH OMOP CDM", ' - b'"omop_es_timestamp": "2023-12-07T14:08:58"}', - b'{"mrn": "12345678", "accession_number": "ABC1234567", "study_datetime": "2021-07-01", ' - b'"procedure_occurrence_id": 2, "project_name": "Test Extract - UCLH OMOP CDM", ' - b'"omop_es_timestamp": "2023-12-07T14:08:58"}', - b'{"mrn": "987654321", "accession_number": "ABC1234560", "study_datetime": "2020-05-01", ' - b'"procedure_occurrence_id": 3, "project_name": "Test Extract - UCLH OMOP CDM", ' - b'"omop_es_timestamp": "2023-12-07T14:08:58"}', - b'{"mrn": "5020765", "accession_number": "MIG0234560", "study_datetime": "2015-05-01", ' - b'"procedure_occurrence_id": 4, "project_name": "Test Extract - UCLH OMOP CDM", ' - b'"omop_es_timestamp": "2023-12-07T14:08:58"}', + Message( + mrn="12345678", + accession_number="12345678", + study_datetime=datetime.date.fromisoformat("2021-07-01"), + procedure_occurrence_id=1, + project_name="Test Extract - UCLH OMOP CDM", + omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), + Message( + mrn="12345678", + accession_number="ABC1234567", + study_datetime=datetime.date.fromisoformat("2021-07-01"), + procedure_occurrence_id=2, + project_name="Test Extract - UCLH OMOP CDM", + omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), + Message( + mrn="987654321", + accession_number="ABC1234560", + study_datetime=datetime.date.fromisoformat("2020-05-01"), + procedure_occurrence_id=3, + project_name="Test Extract - UCLH OMOP CDM", + omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), + Message( + mrn="5020765", + accession_number="MIG0234560", + study_datetime=datetime.date.fromisoformat("2015-05-01"), + procedure_occurrence_id=4, + project_name="Test Extract - UCLH OMOP CDM", + omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), ] assert messages == expected_messages diff --git a/pixl_core/pyproject.toml b/pixl_core/pyproject.toml index 8fdd72727..056307e6b 100644 --- a/pixl_core/pyproject.toml +++ b/pixl_core/pyproject.toml @@ -18,7 +18,8 @@ dependencies = [ "pika==1.3.1", "aio_pika==8.2.4", "environs==9.5.0", - "requests==2.31.0" + "requests==2.31.0", + "jsonpickle==3.0.2" ] [project.optional-dependencies] diff --git a/pixl_core/src/core/patient_queue/message.py b/pixl_core/src/core/patient_queue/message.py new file mode 100644 index 000000000..595be68c0 --- /dev/null +++ b/pixl_core/src/core/patient_queue/message.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent messages in the patient queue.""" + +import logging +from dataclasses import dataclass +from datetime import datetime +from typing import Any + +from jsonpickle import decode, encode + +logger = logging.getLogger(__name__) + + +@dataclass +class Message: + """Class to represent a message containing the relevant information for a study.""" + + mrn: str + accession_number: str + study_datetime: datetime + procedure_occurrence_id: str + project_name: str + omop_es_timestamp: datetime + + def serialise(self, *, deserialisable: bool = True) -> bytes: + """ + Serialise the message into a JSON string and convert to bytes. + + :param deserialisable: If True, the serialised message will be deserialisable, by setting + the unpicklable flag to False in jsonpickle.encode(), meaning that the original Message + object can be recovered by `deserialise()`. If False, calling `deserialise()` on the + serialised message will return a dictionary. + """ + msg = ( + "Serialising message with\n" + " * patient id: %s\n" + " * accession number: %s\n" + " * timestamp: %s\n" + " * procedure_occurrence_id: %s\n", + " * project_name: %s\n * omop_es_timestamp: %s", + self.mrn, + self.accession_number, + self.study_datetime, + self.procedure_occurrence_id, + self.project_name, + self.omop_es_timestamp, + ) + logger.debug(msg) + + return str.encode(encode(self, unpicklable=deserialisable)) + + +def deserialise(serialised_msg: bytes) -> Any: + """ + Deserialise a message from a bytes-encoded JSON string. + If the message was serialised with `deserialisable=True`, the original Message object will be + returned. Otherwise, a dictionary will be returned. + + :param serialised_msg: The serialised message. + """ + return decode(serialised_msg) # noqa: S301, since we control the input, so no security risks diff --git a/pixl_core/src/core/patient_queue/producer.py b/pixl_core/src/core/patient_queue/producer.py index 38f5ec5d1..b840a4584 100644 --- a/pixl_core/src/core/patient_queue/producer.py +++ b/pixl_core/src/core/patient_queue/producer.py @@ -16,6 +16,8 @@ import logging from time import sleep +from core.patient_queue.message import Message + from ._base import PixlBlockingInterface LOGGER = logging.getLogger(__name__) @@ -24,7 +26,7 @@ class PixlProducer(PixlBlockingInterface): """Generic publisher for RabbitMQ""" - def publish(self, messages: list[bytes]) -> None: + def publish(self, messages: list[Message]) -> None: """ Sends a list of serialised messages to a queue. :param messages: list of messages to be sent to queue @@ -32,11 +34,15 @@ def publish(self, messages: list[bytes]) -> None: LOGGER.debug("Publishing %i messages to queue: %s", len(messages), self.queue_name) if len(messages) > 0: for msg in messages: + LOGGER.debug("Serialising message") + serialised_msg = msg.serialise() LOGGER.debug("Preparing to publish") - self._channel.basic_publish(exchange="", routing_key=self.queue_name, body=msg) + self._channel.basic_publish( + exchange="", routing_key=self.queue_name, body=serialised_msg + ) # RabbitMQ can miss-order messages if there is not a sufficient delay sleep(0.1) - LOGGER.debug("Message %s published to queue %s", msg.decode(), self.queue_name) + LOGGER.debug("Message %s published to queue %s", msg, self.queue_name) else: LOGGER.debug("List of messages is empty so nothing will be published to queue.") diff --git a/pixl_core/src/core/patient_queue/subscriber.py b/pixl_core/src/core/patient_queue/subscriber.py index cf5361297..bc868ee6b 100644 --- a/pixl_core/src/core/patient_queue/subscriber.py +++ b/pixl_core/src/core/patient_queue/subscriber.py @@ -21,6 +21,7 @@ import aio_pika +from core.patient_queue.message import Message, deserialise from core.token_buffer.tokens import TokenBucket from ._base import PixlBlockingInterface, PixlQueueInterface @@ -52,7 +53,7 @@ async def __aenter__(self) -> "PixlConsumer": self._queue = await self._channel.declare_queue(self.queue_name) return self - async def run(self, callback: Callable[[bytes], Awaitable[None]]) -> None: + async def run(self, callback: Callable[[Message], Awaitable[None]]) -> None: """ Creates loop that waits for messages from producer and processes them as they appear. @@ -73,7 +74,7 @@ async def run(self, callback: Callable[[bytes], Awaitable[None]]) -> None: try: await asyncio.sleep(0.01) # Avoid very fast callbacks - await callback(message.body) + await callback(deserialise(message.body)) except Exception: LOGGER.exception( "Failed to process %s" "Not re-queuing message", diff --git a/pixl_core/src/core/patient_queue/utils.py b/pixl_core/src/core/patient_queue/utils.py deleted file mode 100644 index 3619b667c..000000000 --- a/pixl_core/src/core/patient_queue/utils.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utility functions""" - -import json -import logging -from datetime import datetime - -logger = logging.getLogger(__name__) - - -def deserialise(message_body: bytes) -> dict: - """Returns the de-serialised message in JSON format.""" - logger.debug("De-serialising: %s", message_body.decode()) - data = dict(json.loads(message_body.decode())) - if "study_datetime" in data: - data["study_datetime"] = datetime.fromisoformat(data["study_datetime"]) - return data - - -def serialise( - mrn: str, - accession_number: str, - study_datetime: datetime, - procedure_occurrence_id: str, - project_name: str, - omop_es_timestamp: datetime, -) -> bytes: - """ - Returns serialised message from the given parameters. - :param mrn: patient identifier - :param accession_number: accession number - :param study_datetime: date and time of the study - :param procedure_occurrence_id: the OMOP ID of the procedure - :returns: JSON formatted message - """ - logger.debug( - "Serialising message with patient id %s, " - "accession number: %s and timestamp %s " - "procedure_occurrence_id %s, ", - "project_name %s, omop_es_timestamp %s", - mrn, - accession_number, - study_datetime, - procedure_occurrence_id, - project_name, - omop_es_timestamp, - ) - return json.dumps( - { - "mrn": mrn, - "accession_number": accession_number, - "study_datetime": study_datetime.isoformat(), - "procedure_occurrence_id": procedure_occurrence_id, - "project_name": project_name, - "omop_es_timestamp": omop_es_timestamp.isoformat(), - } - ).encode("utf-8") diff --git a/pixl_core/tests/patient_queue/test_message.py b/pixl_core/tests/patient_queue/test_message.py new file mode 100644 index 000000000..8471cfdc4 --- /dev/null +++ b/pixl_core/tests/patient_queue/test_message.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime + +from core.patient_queue.message import Message, deserialise + +msg = Message( + mrn="111", + accession_number="123", + study_datetime=datetime.datetime.strptime("Nov 22 2022 1:33PM", "%b %d %Y %I:%M%p").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id="234", + project_name="test project", + omop_es_timestamp=datetime.datetime.strptime("Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p").replace( + tzinfo=datetime.timezone.utc + ), +) + + +def test_serialise() -> None: + """Checks that messages can be correctly serialised""" + msg_body = msg.serialise(deserialisable=False) + assert ( + msg_body == b'{"mrn": "111", "accession_number": "123", ' + b'"study_datetime": "2022-11-22T13:33:00+00:00", ' + b'"procedure_occurrence_id": "234", ' + b'"project_name": "test project", ' + b'"omop_es_timestamp": "2023-12-07T14:08:00+00:00"}' + ) + + +def test_deserialise() -> None: + """Checks if deserialised messages are the same as the original""" + serialised_msg = msg.serialise() + assert deserialise(serialised_msg) == msg diff --git a/pixl_core/tests/patient_queue/test_producer.py b/pixl_core/tests/patient_queue/test_producer.py index 2aa73691e..1aca4cda4 100644 --- a/pixl_core/tests/patient_queue/test_producer.py +++ b/pixl_core/tests/patient_queue/test_producer.py @@ -12,9 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import pytest +from core.patient_queue.message import Message from core.patient_queue.producer import PixlProducer TEST_QUEUE = "test_publish" +TEST_MESSAGE = Message( + mrn="111", + accession_number="123", + study_datetime="2022-11-22T13:33:00+00:00", + procedure_occurrence_id="234", + project_name="test project", + omop_es_timestamp="2023-12-07T14:08:00+00:00", +) @pytest.mark.pika() @@ -32,7 +41,7 @@ def test_publish() -> None: """ with PixlProducer(queue_name=TEST_QUEUE) as pp: pp.clear_queue() - pp.publish(messages=[b"test"]) + pp.publish(messages=[TEST_MESSAGE]) with PixlProducer(queue_name=TEST_QUEUE) as pp: assert pp.message_count == 1 diff --git a/pixl_core/tests/patient_queue/test_subscriber.py b/pixl_core/tests/patient_queue/test_subscriber.py index e2a8c6b1d..32910dedb 100644 --- a/pixl_core/tests/patient_queue/test_subscriber.py +++ b/pixl_core/tests/patient_queue/test_subscriber.py @@ -18,12 +18,21 @@ from unittest import TestCase import pytest +from core.patient_queue.message import Message from core.patient_queue.producer import PixlProducer from core.patient_queue.subscriber import PixlBlockingConsumer, PixlConsumer from core.token_buffer.tokens import TokenBucket TEST_QUEUE = "test_consume" -MESSAGE_BODY = b"test" +TEST_MESSAGE = Message( + mrn="111", + accession_number="123", + study_datetime="2022-11-22T13:33:00+00:00", + procedure_occurrence_id="234", + project_name="test project", + omop_es_timestamp="2023-12-07T14:08:00+00:00", +) + counter = 0 @@ -52,17 +61,17 @@ async def test_create(self) -> None: """Checks consume is working.""" global counter # noqa: PLW0602 with PixlProducer(queue_name=TEST_QUEUE) as pp: - pp.publish(messages=[MESSAGE_BODY]) + pp.publish(messages=[TEST_MESSAGE]) async with PixlConsumer(queue_name=TEST_QUEUE, token_bucket=TokenBucket()) as pc: - async def consume(msg: bytes) -> None: + async def consume(msg: Message) -> None: """ Increases counter when message is downloaded. :param msg: body of the message, though not needed :returns: the increased counter, though here only once """ - if str(msg) != "": + if str(msg.serialise()) != "": global counter counter += 1 @@ -78,7 +87,7 @@ def test_consume_all() -> None: graceful shutdown. """ with PixlProducer(queue_name=TEST_QUEUE) as pp: - pp.publish(messages=[MESSAGE_BODY, MESSAGE_BODY]) + pp.publish(messages=[TEST_MESSAGE, TEST_MESSAGE]) with PixlBlockingConsumer(queue_name=TEST_QUEUE) as bc: counter_bc = bc.consume_all(timeout_in_seconds=2, file_path=Path("test_producer.csv")) diff --git a/pixl_core/tests/patient_queue/test_utils.py b/pixl_core/tests/patient_queue/test_utils.py deleted file mode 100644 index c9b66d0db..000000000 --- a/pixl_core/tests/patient_queue/test_utils.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import datetime -import json - -from core.patient_queue.utils import deserialise, serialise - - -def test_serialise() -> None: - """Checks that messages can be correctly serialised""" - msg_body = serialise( - mrn="111", - accession_number="123", - study_datetime=datetime.datetime.strptime("Nov 22 2022 1:33PM", "%b %d %Y %I:%M%p").replace( - tzinfo=datetime.timezone.utc - ), - procedure_occurrence_id="234", - project_name="test project", - omop_es_timestamp=datetime.datetime.strptime( - "Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p" - ).replace(tzinfo=datetime.timezone.utc), - ) - assert ( - msg_body.decode() == '{"mrn": "111", "accession_number": "123", ' - '"study_datetime": "2022-11-22T13:33:00+00:00", ' - '"procedure_occurrence_id": "234", ' - '"project_name": "test project", ' - '"omop_es_timestamp": "2023-12-07T14:08:00+00:00"}' - ) - - -def test_simple_deserialise() -> None: - """Checks a simple JSON deserialise works""" - assert deserialise((json.dumps({"key": "value"})).encode("utf-8"))["key"] == "value" - - -def test_deserialise_datetime() -> None: - """Checks that datetimes can be correctly serialised""" - timestamp = datetime.datetime.fromordinal(100012) - data = deserialise( - serialise( - mrn="", - accession_number="", - study_datetime=timestamp, - procedure_occurrence_id="", - project_name="", - omop_es_timestamp=datetime.datetime.now(), # noqa: DTZ005 - ) - ) - assert data["study_datetime"] == timestamp diff --git a/pixl_ehr/src/pixl_ehr/_processing.py b/pixl_ehr/src/pixl_ehr/_processing.py index b7d47bb75..89a9380f7 100644 --- a/pixl_ehr/src/pixl_ehr/_processing.py +++ b/pixl_ehr/src/pixl_ehr/_processing.py @@ -21,7 +21,7 @@ from typing import Optional import requests -from core.patient_queue.utils import deserialise +from core.patient_queue.message import Message from decouple import config from pixl_ehr._databases import EMAPStar, PIXLDatabase @@ -35,14 +35,14 @@ _this_dir = Path(Path(__file__).parent) -async def process_message(message_body: bytes) -> None: - logger.info("Processing: %s", message_body.decode()) +async def process_message(message: Message) -> None: + logger.info("Processing: %s", message) - raw_data = PatientEHRData.from_message(message_body) + raw_data = PatientEHRData.from_message(message) pixl_db = PIXLDatabase() if pixl_db.contains(raw_data): - logger.info("Messaged has already been processed") + logger.info("Message has already been processed") return emap_star_db = EMAPStar() @@ -79,16 +79,15 @@ class PatientEHRData: report_text: Optional[str] = None @classmethod - def from_message(cls, message_body: bytes) -> "PatientEHRData": + def from_message(cls, message: Message) -> "PatientEHRData": """ Create a minimal set of patient EHR data required to start queries from a queue message """ - message_data = deserialise(message_body) self = PatientEHRData( - mrn=message_data["mrn"], - accession_number=message_data["accession_number"], - acquisition_datetime=message_data["study_datetime"], + mrn=message.mrn, + accession_number=message.accession_number, + acquisition_datetime=message.study_datetime, ) logger.debug("Created %s from message data", self) diff --git a/pixl_ehr/tests/test_processing.py b/pixl_ehr/tests/test_processing.py index 4dd6f7b1c..d0b933884 100644 --- a/pixl_ehr/tests/test_processing.py +++ b/pixl_ehr/tests/test_processing.py @@ -21,7 +21,7 @@ import datetime import pytest -from core.patient_queue.utils import serialise +from core.patient_queue.message import Message from decouple import config from pixl_ehr._databases import PIXLDatabase, WriteableDatabase from pixl_ehr._processing import process_message @@ -55,7 +55,7 @@ weight_vot_id, height_vot_id, gcs_vot_id = 2222222, 3333333, 4444444 ls_id, lo_id, lr_id, ltd_id = 5555555, 6666666, 7777777, 8888888 -message_body = serialise( +message = Message( mrn=mrn, accession_number=accession_number, study_datetime=datetime.datetime.strptime(study_datetime_str, "%d/%m/%Y %H:%M").replace( @@ -163,7 +163,7 @@ def insert_data_into_emap_star_schema() -> None: @pytest.mark.asyncio() async def test_message_processing() -> None: insert_data_into_emap_star_schema() - await process_message(message_body) + await process_message(message) pixl_db = QueryablePIXLDB() row = pixl_db.execute_query_string("select * from emap_data.ehr_raw where mrn = %s", [mrn]) diff --git a/pixl_pacs/src/pixl_pacs/_processing.py b/pixl_pacs/src/pixl_pacs/_processing.py index 9338d89ee..af0733444 100644 --- a/pixl_pacs/src/pixl_pacs/_processing.py +++ b/pixl_pacs/src/pixl_pacs/_processing.py @@ -15,10 +15,9 @@ import os from asyncio import sleep from dataclasses import dataclass -from datetime import datetime from time import time -from core.patient_queue.utils import deserialise +from core.patient_queue.message import Message from decouple import config from pixl_pacs._orthanc import Orthanc, PIXLRawOrthanc @@ -27,10 +26,10 @@ logger.setLevel(os.environ.get("LOG_LEVEL", "WARNING")) -async def process_message(message_body: bytes) -> None: - logger.info("Processing: %s", message_body.decode()) +async def process_message(message: Message) -> None: + logger.info("Processing: %s", message) - study = ImagingStudy.from_message(message_body) + study = ImagingStudy.from_message(message) orthanc_raw = PIXLRawOrthanc() if study.exists_in(orthanc_raw): @@ -49,7 +48,7 @@ async def process_message(message_body: bytes) -> None: while job_state != "Success": if (time() - start_time) > config("PIXL_DICOM_TRANSFER_TIMEOUT", cast=float): msg = ( - f"Failed to transfer {message_body.decode()} within " + f"Failed to transfer {message.decode()} within " f"{config('PIXL_DICOM_TRANSFER_TIMEOUT')} seconds" ) raise TimeoutError(msg) @@ -64,22 +63,20 @@ async def process_message(message_body: bytes) -> None: class ImagingStudy: """Dataclass for EHR unique to a patient and xray study""" - mrn: str - accession_number: str - study_datetime: datetime - procedure_occurrence_id: str - project_name: str - omop_es_timestamp: datetime + message: Message @classmethod - def from_message(cls, message_body: bytes) -> "ImagingStudy": - return ImagingStudy(**deserialise(message_body)) + def from_message(cls, message: Message) -> "ImagingStudy": + return ImagingStudy(message=message) @property def orthanc_query_dict(self) -> dict: return { "Level": "Study", - "Query": {"PatientID": self.mrn, "AccessionNumber": self.accession_number}, + "Query": { + "PatientID": self.message.mrn, + "AccessionNumber": self.message.accession_number, + }, } def exists_in(self, node: Orthanc) -> bool: diff --git a/pixl_pacs/tests/test_processing.py b/pixl_pacs/tests/test_processing.py index 250d976d9..82084e44b 100644 --- a/pixl_pacs/tests/test_processing.py +++ b/pixl_pacs/tests/test_processing.py @@ -19,7 +19,7 @@ import os import pytest -from core.patient_queue.utils import serialise +from core.patient_queue.message import Message from decouple import config from pixl_pacs._orthanc import Orthanc, PIXLRawOrthanc from pixl_pacs._processing import ImagingStudy, process_message @@ -30,7 +30,7 @@ ACCESSION_NUMBER = "abc" PATIENT_ID = "a_patient" -message_body = serialise( +message = Message( mrn=PATIENT_ID, accession_number=ACCESSION_NUMBER, study_datetime=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( @@ -73,15 +73,15 @@ def add_image_to_fake_vna(image_filename: str = "test.dcm") -> None: @pytest.mark.asyncio() async def test_image_processing() -> None: add_image_to_fake_vna() - study = ImagingStudy.from_message(message_body) + study = ImagingStudy.from_message(message) orthanc_raw = PIXLRawOrthanc() assert not study.exists_in(orthanc_raw) - await process_message(message_body=message_body) + await process_message(message) assert study.exists_in(orthanc_raw) # TODO: check time last updated after processing again # noqa: FIX002 # is not incremented # https://github.com/UCLH-Foundry/PIXL/issues/156 - await process_message(message_body=message_body) + await process_message(message) assert study.exists_in(orthanc_raw) From a243f6ffc72d7e03c8fbbf0272ce955eb35174f2 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Wed, 20 Dec 2023 17:39:30 +0000 Subject: [PATCH 19/28] Format README --- cli/README.md | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/cli/README.md b/cli/README.md index 6745c73a5..b44ba8044 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,11 +1,10 @@ # PIXL Driver + Command line interface -The PIXL CLI driver provides functionality to populate a queue with messages -containing information required to run electronic health queries against the -EMAP star database and the PACS image system. Once a set of queues are -populated the consumers can be started, updated and the system extractions -stopped cleanly. - +The PIXL CLI driver provides functionality to populate a queue with messages +containing information required to run electronic health queries against the +EMAP star database and the PACS image system. Once a set of queues are +populated the consumers can be started, updated and the system extractions +stopped cleanly. ## Installation @@ -14,43 +13,50 @@ pip install -e ../pixl_core/ . ``` ## Test + ```bash ./tests/run-tests.sh ``` - ## Usage > **Note** > Services must be started prior to using the CLI See the commands and subcommands with + ```bash pixl --help ``` Populate queue for PACS and EHR extraction + ```bash pixl populate .csv ``` + where the csv file contains MRN, accession numbers and timestamps in the format: -| VAL_ID | ACCESSION_NUMBER | STUDY_INSTANCE_UID | STUDY_DATE | ... | +| VAL_ID | ACCESSION_NUMBER | STUDY_INSTANCE_UID | STUDY_DATE | ... | |--------|------------------|--------------------|------------------|-----| | X | Y | Z | 29/02/2010 05:12 | | - Start the PACS extraction + ```bash pixl start --queues pacs ``` + and equivalently the EHR extraction + ```bash pixl start --queues ehr ``` + Use `pixl start --help` for information. Stop PACS and EHR database extraction + ```bash pixl stop ``` From b4c7a514f58d58995e9cb3e8d324c1c40a1b0bba Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Wed, 20 Dec 2023 17:49:35 +0000 Subject: [PATCH 20/28] Update documentation with new parquet inputs --- cli/README.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cli/README.md b/cli/README.md index b44ba8044..be60c00d7 100644 --- a/cli/README.md +++ b/cli/README.md @@ -32,14 +32,22 @@ pixl --help Populate queue for PACS and EHR extraction ```bash -pixl populate .csv +pixl populate --parquet-dir ``` -where the csv file contains MRN, accession numbers and timestamps in the format: - -| VAL_ID | ACCESSION_NUMBER | STUDY_INSTANCE_UID | STUDY_DATE | ... | -|--------|------------------|--------------------|------------------|-----| -| X | Y | Z | 29/02/2010 05:12 | | +where `parquet_dir` contains at least the following files: + +```sh +resources +└── omop + ├── log + │ └── extract_summary.json + ├── private + │ ├── PERSON_LINKS.parquet + │ └── PROCEDURE_OCCURRENCE_LINKS.parquet + └── public + └── PROCEDURE_OCCURRENCE.parquet +``` Start the PACS extraction From 10dd3e60c1a9df7d56380151611b8e901331fb5c Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Wed, 20 Dec 2023 17:56:26 +0000 Subject: [PATCH 21/28] Update system test to use parquet files --- .../resources/omop/log/extract_summary.json | 94 ++++++++++++++++++ .../omop/private/PERSON_LINKS.parquet | Bin 0 -> 1953 bytes .../PROCEDURE_OCCURRENCE_LINKS.parquet | Bin 0 -> 1311 bytes .../omop/public/PROCEDURE_OCCURRENCE.parquet | Bin 0 -> 5230 bytes test/data/test.csv | 2 - test/run-system-test.sh | 2 +- 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 test/data/resources/omop/log/extract_summary.json create mode 100644 test/data/resources/omop/private/PERSON_LINKS.parquet create mode 100644 test/data/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet create mode 100644 test/data/resources/omop/public/PROCEDURE_OCCURRENCE.parquet delete mode 100644 test/data/test.csv diff --git a/test/data/resources/omop/log/extract_summary.json b/test/data/resources/omop/log/extract_summary.json new file mode 100644 index 000000000..ab66372fc --- /dev/null +++ b/test/data/resources/omop/log/extract_summary.json @@ -0,0 +1,94 @@ +{ + "gitsha":"56e0eba8d098523c99f3c899979096d2c5ed4c5f", + "filesummaries":[ +"CARE_SITE.parquet: 4084 bytes", +"CARE_SITE_BAD.parquet: 3305 bytes", +"CARE_SITE_LINKS.parquet: 1201 bytes", +"CDM_SOURCE.parquet: 5823 bytes", +"CDM_SOURCE_BAD.parquet: 7852 bytes", +"CONDITION_OCCURRENCE.parquet: 6770 bytes", +"CONDITION_OCCURRENCE_BAD.parquet: 5004 bytes", +"CONDITION_OCCURRENCE_LINKS.parquet: 612 bytes", +"DEVICE_EXPOSURE.parquet: 4524 bytes", +"DEVICE_EXPOSURE_BAD.parquet: 4524 bytes", +"DEVICE_EXPOSURE_LINKS.parquet: 682 bytes", +"DRUG_EXPOSURE.parquet: 5782 bytes", +"DRUG_EXPOSURE_BAD.parquet: 3907 bytes", +"DRUG_EXPOSURE_LINKS.parquet: 597 bytes", +"FACT_RELATIONSHIP.parquet: 2167 bytes", +"FACT_RELATIONSHIP_BAD.parquet: 1357 bytes", +"LOCATION.parquet: 1865 bytes", +"LOCATION_BAD.parquet: 1343 bytes", +"LOCATION_LINKS.parquet: 904 bytes", +"MEASUREMENT.parquet: 6742 bytes", +"MEASUREMENT_BAD.parquet: 3982 bytes", +"MEASUREMENT_LINKS.parquet: 2309 bytes", +"OBSERVATION.parquet: 5614 bytes", +"OBSERVATION_BAD.parquet: 3618 bytes", +"OBSERVATION_LINKS.parquet: 1263 bytes", +"OBSERVATION_PERIOD.parquet: 2183 bytes", +"OBSERVATION_PERIOD_BAD.parquet: 1488 bytes", +"OBSERVATION_PERIOD_LINKS.parquet: 606 bytes", +"PERSON.parquet: 5420 bytes", +"PERSON_BAD.parquet: 3614 bytes", +"PERSON_LINKS.parquet: 1953 bytes", +"PROCEDURE_OCCURRENCE.parquet: 5230 bytes", +"PROCEDURE_OCCURRENCE_BAD.parquet: 3665 bytes", +"PROCEDURE_OCCURRENCE_LINKS.parquet: 1311 bytes", +"SPECIMEN.parquet: 4873 bytes", +"SPECIMEN_BAD.parquet: 3326 bytes", +"SPECIMEN_LINKS.parquet: 928 bytes", +"VISIT_DETAIL.parquet: 3228 bytes", +"VISIT_DETAIL_BAD.parquet: 3228 bytes", +"VISIT_DETAIL_LINKS.parquet: 435 bytes", +"VISIT_OCCURRENCE.parquet: 5259 bytes", +"VISIT_OCCURRENCE_BAD.parquet: 3429 bytes", +"VISIT_OCCURRENCE_LINKS.parquet: 1349 bytes" + ], + "datetime":"2023-12-07 14:08:58", + "user":"John Watts", + "settings":{ + "site":"UCLH", + "cdm_source_name":"Test Extract - UCLH OMOP CDM", + "cdm_source_abbreviation":"Test UCLH OMOP", + "project_logic":"mock_project_settings/project_logic.R", + "min_date": 20100101, + "max_date": 20241231, + "enabled_sources":"epic", + "output_format":"parquet", + "OMOP_version": 60, + "cohort":{ + "file":"settings/mock_project_settings/mock_cohort.csv", + "exclude_NDOO": true, + "exclude_confidential": true, + "min_age_at_encounter_start": 16, + "max_age_at_encounter_start": 80 + }, + "keep_source_vals": false, + "person":{ + "include_nhs_number": false, + "include_mrn": false, + "keep_day_of_birth": false, + "keep_month_of_birth": true, + "include_gp_as_primary_care_site": false + }, + "observation_period_strategy":"visit_span", + "local_timezone":"Europe/London", + "output_timezone":"GMT", + "condition_occurrence":{ + "include_sexual_health": false, + "allow_icd_as_std": true + }, + "measurements":{ + "include_file":null, + "include_measurement_concept_ids":null, + "non_generic_numeric_labs": 3040104 + }, + "location":{ + "keep_only_zip": true, + "replace_postcode_with_LSOA": true + }, + "mapping_effective_date": 19698, + "name":"mock_project_settings" + } +} \ No newline at end of file diff --git a/test/data/resources/omop/private/PERSON_LINKS.parquet b/test/data/resources/omop/private/PERSON_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bef07a1a24948de388872a05c7ea6a1cd68ebf1c GIT binary patch literal 1953 zcmcIly;B-d5Px@^ClHcR$9HqgTtT5X7RK-)h?#LF?~o4_6;b>k&Ny&ffC&OQ{0J#i zr?AST%s8o1rm#w#Dy2pKloTnldx)HfnH2hlxA%Vg_U-=O?jDz6*#H%&OqDbhDH$Wg zGtCh~c$f~Dl+bZ1Ps82_Oi_^q)XDpbk0d%IrW9={FgiLH1HynM+zhG04{wzUxIEQ<^k^A;)z;WKpW)QL!h|9 z^S#jHIhrXPRgL=5;;~sM){GUSZR309P=E2jxaX?R@j%8Yn(k(H;)4@{2ZEv5aAYp( zg7tiKE)t#%1p~tX!v3HiVE;`bWXg4ICp){x`>$~}tWeKYy4@MGdRQ>qt7hHe|C5t0 zIrWREejifqayO7Xq(2ROuq`mdPv}^$6Cxrghh?kcgs5jOdm3v6-B^FMX_zNf{W^_X zQeM4usuxZL&JK(>LdVlsb%^IwJmCLvC-4}RdVysDwX%Tw5xXFLrL#+IEbaU7ACUT(ksScfx00@lf13LJNR}Z5$XXu zTV*CmTEW}l@zG3w!a8~zYa)qW#k>Ciua)|cZ-9FLeR&V9_Asp>{TY%()hV5e z*~7Bg^_}OPPLY2*p5X{FjWM0uD_c4Z3|4U!V{8eZfV)pXIaFC5 zTKG(zqKu+I$1Ju5K@i4cam1RnZ!Hlr)_|#L`DC`8-(7pR8QczoB8Sv)mu;fZ_O7tT zDu~%@6qbKnFqTD;OTMk_1-CQ#+FDJo=bNxj=XcRm+qmg3*7dJx)=9W5TA}C+$OHIG zEjNohiMEjow2NQ?`@kDhwHW_DJ_&zeXZyGm&j$3vM75ag@`*)n@&TvX2Ie=(n9de4 zcqbUHAmox&pXCm)B#IbnQyScL?W9MvD&En0-+iIjNiKt{{{9RsR)E*;N$!kvKUu2 literal 0 HcmV?d00001 diff --git a/test/data/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet b/test/data/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet new file mode 100644 index 0000000000000000000000000000000000000000..97a953bd41d609f74696b67c4456a83b77986283 GIT binary patch literal 1311 zcmcIkOLNjt6ut?F8K$<_>5a)u7C6&px@ap8!Q#}#4ML%%2#BF((}8e>A}>P`+n?f3 zaOtwY!L=*Ll`F@E&o8YKF;gA-}w?!VwEWM(=c5wP=%63LPA>-Aw-7h zgXxD!30@SCdq2>JgO-(3@Ey&U%;SqXZkHL*X5(p;hz+Krsu zeHyb9m^zx(vyA8jK%Ze>&|ePwgdr}&0|2GPrH`kESaGpjTqbTW)p2T-sN1t&G-Ab0 zbARX9sUbyq@dcT+AqE>e#>R2f zLl0FHMO775QI$j01B!Zxq8_RqqNv9z%B?;1+Dnu}#3_fWs-mjz&3Zk%W=Yxxvcj`F zf8YDw_ukuCG0&r;)Jc8xScJML=_91~BL^X*1%9pYbHa}jIzW|SnD3?SwB4&Xaa3{Q zIyZUG)PsSJW4P7jXlZ>+*aL@`ws=mF_Fj*2q*y9wnRKO;St@8+rBupnXqlyKnzp!I z?z;~64-PiNTrUFm3-_-*vx!4YxGC5mOTsWn}mJml&lXG zGo{VKhQV{&0aV$Kt?YRVFqIG}gY1!)Gu>7u$nQI6$(451MqeCNoH&obvS(p>hp@le zCDQ?^uT}tQi)Ab6>+S5|oF-ITx)U^HzAa&>+6WXRRw>HfP5UI*(=Hf%LlK+^VcUI@kx9$UMtZ4{A{X3A z_T(^obl3}7Kz^d^7KMOFEDVV3y|O+(D_+`BPj1)cN@}B=E!RY)q9WH7_Vmb} ziu{MTfAyBh<*n>ywrq1wpvB)j?7n9|EdDX*rCoK$Oo87lR7!9%ZKZM*y>ij$gF*Jq z;GRbJFYw>KEmatl$yn!PTan)%W!H`#WCA?&Nu6jT8Es{{;bS}R>?M=udCw`Lv~Oyc zGWitsLT(JT?D<)s&&uOh@7o@7`c%_VGSn5xK@m_V*f70D@D+j;Jv0#c;;?rZ%S7M4 z_sdV8H(?lW2D1rM{utA?X6%ZY&<-HkC@tMyzfB1c+64}UdFR%A2;H;D^`8oL9NJ%BMJA z<-t`krmCEu!>1tUVZ1}%xI^!K94Wvy7=&eWRah3(U^~AcmMuU(6h;dxd=eh3upK6# zUxNo8Mk6>>Z_kI$`2A;~H#r5X0ZNb%kJ#nvMQqB;foL(A&*ik-_|@gHSZy&7TS@1q zHq&vO4^Ew}ijyi})8XB~w136^kcQrBb2l+k*duWeXd=(Xo>wCr3Ox@U{6b|H$)BLgwK=jsF0!oDvZL literal 0 HcmV?d00001 diff --git a/test/data/test.csv b/test/data/test.csv deleted file mode 100644 index f6f9fbd2d..000000000 --- a/test/data/test.csv +++ /dev/null @@ -1,2 +0,0 @@ -VAL_ID,ACCESSION_NUMBER,STUDY_INSTANCE_UID,STUDY_DATE -patient_identifier,123456789,c,01/01/2022 00:01 diff --git a/test/run-system-test.sh b/test/run-system-test.sh index dfa8b2a7d..392a2c534 100755 --- a/test/run-system-test.sh +++ b/test/run-system-test.sh @@ -26,7 +26,7 @@ cd .. && \ ./scripts/insert_test_data.sh pip install "${PACKAGE_DIR}/pixl_core" "${PACKAGE_DIR}/cli" -pixl populate data/test.csv +pixl populate --parquet-dir "${PACKAGE_DIR}/test/data/resources/omop" pixl start sleep 65 # need to wait until the DICOM image is "stable" = 60s ./scripts/check_entry_in_pixl_anon.sh From 1192210e2b8f08aa79484569f30a2bc8f31a0c1e Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 10:38:34 +0100 Subject: [PATCH 22/28] Move `extract_summary.json` one level up and update docs --- cli/README.md | 16 +++++++--------- cli/src/pixl_cli/main.py | 9 ++++++--- .../omop/{log => }/extract_summary.json | 0 .../omop/{log => }/extract_summary.json | 0 4 files changed, 13 insertions(+), 12 deletions(-) rename cli/tests/resources/omop/{log => }/extract_summary.json (100%) rename test/data/resources/omop/{log => }/extract_summary.json (100%) diff --git a/cli/README.md b/cli/README.md index be60c00d7..2c68c0965 100644 --- a/cli/README.md +++ b/cli/README.md @@ -38,15 +38,13 @@ pixl populate --parquet-dir where `parquet_dir` contains at least the following files: ```sh -resources -└── omop - ├── log - │ └── extract_summary.json - ├── private - │ ├── PERSON_LINKS.parquet - │ └── PROCEDURE_OCCURRENCE_LINKS.parquet - └── public - └── PROCEDURE_OCCURRENCE.parquet +parquet_dir +├── extract_summary.json +├── private +│ ├── PERSON_LINKS.parquet +│ └── PROCEDURE_OCCURRENCE_LINKS.parquet +└── public + └── PROCEDURE_OCCURRENCE.parquet ``` Start the PACS extraction diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index fdaf3d71c..e043d58e5 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -302,13 +302,17 @@ def messages_from_parquet(dir_path: Path) -> list[Message]: """ public_dir = dir_path / "public" private_dir = dir_path / "private" - log_dir = dir_path / "log" + log_file = dir_path / "extract_summary.json" - for d in [public_dir, private_dir, log_dir]: + for d in [public_dir, private_dir]: if not d.is_dir(): err_str = f"{d} must exist and be a directory" raise ValueError(err_str) + if not log_file.is_file(): + err_str = f"{log_file} must exist and be a file" + raise ValueError(err_str) + # MRN in people.PrimaryMrn: people = pd.read_parquet(private_dir / "PERSON_LINKS.parquet") # accession number in accessions.AccesionNumber @@ -345,7 +349,6 @@ def messages_from_parquet(dir_path: Path) -> list[Message]: ) = expected_col_names # Get project name and OMOP ES timestamp from log file - log_file = log_dir / "extract_summary.json" logs = json.load(log_file.open()) project_name = logs["settings"]["cdm_source_name"] omop_es_timestamp = datetime.datetime.fromisoformat(logs["datetime"]) diff --git a/cli/tests/resources/omop/log/extract_summary.json b/cli/tests/resources/omop/extract_summary.json similarity index 100% rename from cli/tests/resources/omop/log/extract_summary.json rename to cli/tests/resources/omop/extract_summary.json diff --git a/test/data/resources/omop/log/extract_summary.json b/test/data/resources/omop/extract_summary.json similarity index 100% rename from test/data/resources/omop/log/extract_summary.json rename to test/data/resources/omop/extract_summary.json From 6b4f38cd30a2d823c9e91ff0f320c5dbbc32701d Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 11:24:29 +0100 Subject: [PATCH 23/28] Change `parquet-dir` to a click argument instead of option --- cli/README.md | 2 +- cli/src/pixl_cli/main.py | 6 +++--- cli/tests/test_queue_start_and_stop_parquet.py | 14 ++++++-------- test/run-system-test.sh | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/cli/README.md b/cli/README.md index 2c68c0965..9351d4e32 100644 --- a/cli/README.md +++ b/cli/README.md @@ -32,7 +32,7 @@ pixl --help Populate queue for PACS and EHR extraction ```bash -pixl populate --parquet-dir +pixl populate ``` where `parquet_dir` contains at least the following files: diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index e043d58e5..2f5d9edfd 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -70,13 +70,13 @@ def cli(*, debug: bool) -> None: default=True, help="Restart from a saved state. Otherwise will use the given input file(s)", ) -@click.option( - "--parquet-dir", +@click.argument( + "parquet-dir", required=True, type=click.Path(path_type=Path, exists=True, file_okay=False), help="Give a directory containing parquet input files", ) -def populate(queues: str, *, restart: bool, parquet_dir: Path) -> None: +def populate(parquet_dir: Path, *, restart: bool, queues: str) -> None: """Populate a (set of) queue(s) from a parquet file directory""" logger.info(f"Populating queue(s) {queues} from {parquet_dir}") for queue in queues.split(","): diff --git a/cli/tests/test_queue_start_and_stop_parquet.py b/cli/tests/test_queue_start_and_stop_parquet.py index 4e0c5780a..4affebb57 100644 --- a/cli/tests/test_queue_start_and_stop_parquet.py +++ b/cli/tests/test_queue_start_and_stop_parquet.py @@ -19,24 +19,22 @@ from pixl_cli.main import populate, queue_is_up, stop -def test_populate_queue_parquet(resources, queue_name: str = "test_populate") -> None: +def test_populate_queue_parquet(resources: Path, queue_name: str = "test_populate") -> None: """Checks that patient queue can be populated without error.""" - omop_parquet_dir = resources / "omop" + omop_parquet_dir = str(resources / "omop") runner = CliRunner() - result = runner.invoke( - populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir] - ) + result = runner.invoke(populate, args=[omop_parquet_dir, "--queues", queue_name]) assert result.exit_code == 0 -def test_down_queue_parquet(resources, queue_name: str = "test_down") -> None: +def test_down_queue_parquet(resources: Path, queue_name: str = "test_down") -> None: """ Checks that after the queue has been sent a stop signal, the queue has been emptied. """ - omop_parquet_dir = resources / "omop" + omop_parquet_dir = str(resources / "omop") runner = CliRunner() - _ = runner.invoke(populate, args=["--queues", queue_name, "--parquet-dir", omop_parquet_dir]) + _ = runner.invoke(populate, args=[omop_parquet_dir, "--queues", queue_name]) _ = runner.invoke(stop, args=["--queues", queue_name]) state_path = Path(f"{queue_name}.state") diff --git a/test/run-system-test.sh b/test/run-system-test.sh index 392a2c534..dc70cd51d 100755 --- a/test/run-system-test.sh +++ b/test/run-system-test.sh @@ -26,7 +26,7 @@ cd .. && \ ./scripts/insert_test_data.sh pip install "${PACKAGE_DIR}/pixl_core" "${PACKAGE_DIR}/cli" -pixl populate --parquet-dir "${PACKAGE_DIR}/test/data/resources/omop" +pixl populate "${PACKAGE_DIR}/test/data/resources/omop" pixl start sleep 65 # need to wait until the DICOM image is "stable" = 60s ./scripts/check_entry_in_pixl_anon.sh From 3a04feb11b2e1e79349018635fd7445aa478e978 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 11:39:47 +0100 Subject: [PATCH 24/28] `click.argument` doesn't have a `help` parameter Document `parquet-dir` in the docstrings instead --- cli/src/pixl_cli/main.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 2f5d9edfd..46c95f10b 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -71,13 +71,24 @@ def cli(*, debug: bool) -> None: help="Restart from a saved state. Otherwise will use the given input file(s)", ) @click.argument( - "parquet-dir", - required=True, - type=click.Path(path_type=Path, exists=True, file_okay=False), - help="Give a directory containing parquet input files", + "parquet-dir", required=True, type=click.Path(path_type=Path, exists=True, file_okay=False) ) def populate(parquet_dir: Path, *, restart: bool, queues: str) -> None: - """Populate a (set of) queue(s) from a parquet file directory""" + """ + Populate a (set of) queue(s) from a parquet file directory + + PARQUET-DIR: Directory containing the public and private parquet input files and an + extract_summary.json log file. + It's expected that the directory structure will be: + + PARQUET-DIR + ├── private + │ ├── PERSON_LINKS.parquet + │ └── PROCEDURE_OCCURRENCE_LINKS.parquet + ├── public + │ └── PROCEDURE_OCCURRENCE.parquet + └── extract_summary.json + """ logger.info(f"Populating queue(s) {queues} from {parquet_dir}") for queue in queues.split(","): with PixlProducer(queue_name=queue, **config["rabbitmq"]) as producer: From 1a2fab412c1387b4d6ec323cb32781567cf6db51 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 11:43:41 +0100 Subject: [PATCH 25/28] Slim down parquet test files to only those needed --- .../resources/omop/private/CARE_SITE_BAD.parquet | Bin 3305 -> 0 bytes .../omop/private/CARE_SITE_LINKS.parquet | Bin 1201 -> 0 bytes .../omop/private/CDM_SOURCE_BAD.parquet | Bin 7852 -> 0 bytes .../private/CONDITION_OCCURRENCE_BAD.parquet | Bin 5004 -> 0 bytes .../private/CONDITION_OCCURRENCE_LINKS.parquet | Bin 612 -> 0 bytes .../omop/private/DEVICE_EXPOSURE_BAD.parquet | Bin 4524 -> 0 bytes .../omop/private/DEVICE_EXPOSURE_LINKS.parquet | Bin 682 -> 0 bytes .../omop/private/DRUG_EXPOSURE_BAD.parquet | Bin 3907 -> 0 bytes .../omop/private/DRUG_EXPOSURE_LINKS.parquet | Bin 597 -> 0 bytes .../omop/private/FACT_RELATIONSHIP_BAD.parquet | Bin 1357 -> 0 bytes .../resources/omop/private/LOCATION_BAD.parquet | Bin 1343 -> 0 bytes .../omop/private/LOCATION_LINKS.parquet | Bin 904 -> 0 bytes .../omop/private/MEASUREMENT_BAD.parquet | Bin 3982 -> 0 bytes .../omop/private/MEASUREMENT_LINKS.parquet | Bin 2309 -> 0 bytes .../omop/private/OBSERVATION_BAD.parquet | Bin 3618 -> 0 bytes .../omop/private/OBSERVATION_LINKS.parquet | Bin 1263 -> 0 bytes .../omop/private/OBSERVATION_PERIOD_BAD.parquet | Bin 1488 -> 0 bytes .../private/OBSERVATION_PERIOD_LINKS.parquet | Bin 606 -> 0 bytes .../resources/omop/private/PERSON_BAD.parquet | Bin 3614 -> 0 bytes .../private/PROCEDURE_OCCURRENCE_BAD.parquet | Bin 3665 -> 0 bytes .../resources/omop/private/SPECIMEN_BAD.parquet | Bin 3326 -> 0 bytes .../omop/private/SPECIMEN_LINKS.parquet | Bin 928 -> 0 bytes .../omop/private/VISIT_DETAIL_BAD.parquet | Bin 3228 -> 0 bytes .../omop/private/VISIT_DETAIL_LINKS.parquet | Bin 435 -> 0 bytes .../omop/private/VISIT_OCCURRENCE_BAD.parquet | Bin 3429 -> 0 bytes .../omop/private/VISIT_OCCURRENCE_LINKS.parquet | Bin 1349 -> 0 bytes .../resources/omop/public/CARE_SITE.parquet | Bin 4084 -> 0 bytes .../resources/omop/public/CDM_SOURCE.parquet | Bin 5823 -> 0 bytes .../omop/public/CONDITION_OCCURRENCE.parquet | Bin 6770 -> 0 bytes .../omop/public/DEVICE_EXPOSURE.parquet | Bin 4524 -> 0 bytes .../resources/omop/public/DRUG_EXPOSURE.parquet | Bin 5782 -> 0 bytes .../omop/public/FACT_RELATIONSHIP.parquet | Bin 2167 -> 0 bytes cli/tests/resources/omop/public/LOCATION.parquet | Bin 1865 -> 0 bytes .../resources/omop/public/MEASUREMENT.parquet | Bin 6742 -> 0 bytes .../resources/omop/public/OBSERVATION.parquet | Bin 5614 -> 0 bytes .../omop/public/OBSERVATION_PERIOD.parquet | Bin 2183 -> 0 bytes cli/tests/resources/omop/public/PERSON.parquet | Bin 5420 -> 0 bytes cli/tests/resources/omop/public/SPECIMEN.parquet | Bin 4873 -> 0 bytes .../resources/omop/public/VISIT_DETAIL.parquet | Bin 3228 -> 0 bytes .../omop/public/VISIT_OCCURRENCE.parquet | Bin 5259 -> 0 bytes 40 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cli/tests/resources/omop/private/CARE_SITE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/CARE_SITE_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/CONDITION_OCCURRENCE_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/DEVICE_EXPOSURE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/DEVICE_EXPOSURE_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/DRUG_EXPOSURE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/FACT_RELATIONSHIP_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/LOCATION_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/LOCATION_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/OBSERVATION_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/OBSERVATION_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/OBSERVATION_PERIOD_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/OBSERVATION_PERIOD_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/PERSON_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/SPECIMEN_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/VISIT_DETAIL_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/VISIT_DETAIL_LINKS.parquet delete mode 100644 cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet delete mode 100644 cli/tests/resources/omop/private/VISIT_OCCURRENCE_LINKS.parquet delete mode 100644 cli/tests/resources/omop/public/CARE_SITE.parquet delete mode 100644 cli/tests/resources/omop/public/CDM_SOURCE.parquet delete mode 100644 cli/tests/resources/omop/public/CONDITION_OCCURRENCE.parquet delete mode 100644 cli/tests/resources/omop/public/DEVICE_EXPOSURE.parquet delete mode 100644 cli/tests/resources/omop/public/DRUG_EXPOSURE.parquet delete mode 100644 cli/tests/resources/omop/public/FACT_RELATIONSHIP.parquet delete mode 100644 cli/tests/resources/omop/public/LOCATION.parquet delete mode 100644 cli/tests/resources/omop/public/MEASUREMENT.parquet delete mode 100644 cli/tests/resources/omop/public/OBSERVATION.parquet delete mode 100644 cli/tests/resources/omop/public/OBSERVATION_PERIOD.parquet delete mode 100644 cli/tests/resources/omop/public/PERSON.parquet delete mode 100644 cli/tests/resources/omop/public/SPECIMEN.parquet delete mode 100644 cli/tests/resources/omop/public/VISIT_DETAIL.parquet delete mode 100644 cli/tests/resources/omop/public/VISIT_OCCURRENCE.parquet diff --git a/cli/tests/resources/omop/private/CARE_SITE_BAD.parquet b/cli/tests/resources/omop/private/CARE_SITE_BAD.parquet deleted file mode 100644 index 1028dcef5fcb22e5e4c7f8f1c01c07e240b9d4dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3305 zcmd5<-EQJW6n5BTRe4b_s)9getK0;sQnRs5OhWGl2M9~-4T%4n$TkFUZ0zMH7}U!? z$ZAzpeSlu{va0$FeV(2(#$bqoHchpv1>^bo&Ud~!XXXg5NrjK>N0>o>uNNypuS{E4Cj)!9Dd3kE zJIw#wUkQk=1&rb z?I^UFByVb0;F(AmjJ9HV#9SpXm&g|kk_Ucd-@Xy@?cUH58D-*$xcS8Mm3-!bNpP#cLTdU@e z;ho}HfwmmWb3&ShVOrqD6d>?cjafd;a;zYpQc#-q{E=tKdk|_Z@^tio1TF9z1PQ(; zo@3R?WHivG6Y#fmf5z&zIkko`v9w)F{Tv=YueV@aAjVz>c!6D4m_f&%7S7lQNu~0x@q<^&VjsdM_=qG&2r0uC zpAsRbB-lAx3QF~^Bvy^S)wO3NAvMU+I<(mM8%$1&vbAoK-c(CUs zOYCcn`a&{tf@A>3sByrtkjplVi@ZX>N431g1-`4Tl1yw@z9<5&<^tY*oO{i))WFNt zB}b3$!3C|TuheQ*INdS_oo2<31$GbG7l7?-Ve9 ztDL4JOS#uB>l1D0^KJWv+^u})=78_(R{d5NOs-weIBm?iQS@?R=q0`2_FF>zRukHQ zsn0u>X#fuLAsX#wmTL;;UBJJq5XWdW-zG6LPN=GJvT#z;UZK$^kva zZ9NMuRiC5(j+GyG8nqx+l!n=HtD%@pA!C6*Qy;b+#OgwZ*DaV&wfhhYb9rX|yO?@+ zyny$yig)uGFR^rv<32Vy3cwmcQrBlECwRTfHG0!!oLrR-LN0Ok^3(Q#mhEe(Kkfn= zbeGFhCtlH^UBr-V3!9BRY{vxj>R8I^$Vc@RslH1}K)bXj;ab7HZwYx$ZDgjHZ<4!M z`X^ahq>9FJ?CYWSYdugOxAl1E0%mD(L%ehjBqs>;f=I^D$94bFu=6SBkzDzDmimAM ziZe)Uk?f4n6o5E?lfFM2`2+qa4)#5V^S)4_Ceghk2&b!E@){Ors(lb@PsZ15OQ@>G>G4jF)0N&^56 zJj3->w5551CoCgI)w7%MYA|F3y?^gM#^ zP5-3XIXn5Ugyt#|0cQk}#tmtf2!GAz^Xr6UHDV-*Fw&L?f2>4=4{Bq_l?Q{S^e#rO z+>`GXFfW0aGz(QDL$oU?igxX0QT>AL~>8``=0du5TT{_ z?5}bAbKJs9Hx)9g1uzo6dn`{Kp1`V)*xv&WOxHUM>i(P$iWwt|5g-`NvdGf@;m)bXXYCj=UXVn>LfC=HdZhygF?R1#vB zcGSZiHFA}dzeY}#s#K@cs1(JqGP)$G(AB~^7};r@xTo#+Zf-i3NHC}=9`uw|<+q_; z-&K4SbvZ=EC_3&G{lm`wxRvjP%sc>gM2S%{a^7xwp@`ovX_pvoom zV=Jn(I+7a16<-~8E936w{-pn`J#ab!1I`2c#(cA29xA-4q!M6cCyw{#Tu#^A$z;v( hyt7&MLVEt_>~(fyb3M17Gq){(;DMj`qp9Ja)gSU93#$MC diff --git a/cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet b/cli/tests/resources/omop/private/CDM_SOURCE_BAD.parquet deleted file mode 100644 index 2478ac1ff00fe3b5c80757d96ec827b51e485dbe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7852 zcmdT}O>83f6(2^#>m}^Q>)m0oiYgAfqm>db%&=gSQwD4>;K4TK`(iP}7>xNU3Zu`zvn#=4-UQ29g(4! zb;k7p>|bM6m=#Zmdf~Zhzx&En5O&{7!=1L-Xo~%U_At(kAhoi#;eJo9wvPLqK~GkW z+m)8W&@S5b8SVOV`1XpQHLiqPnre zJu{wB@7s4w^`2VPt4LB$IX|tar=2!Y^Nr2*XB+#io&BvH6`kRFNy=Ph)|e-{GOHdt zAVHLKRIuZ`o7`vMzF9TlIuLHX)2u2zg76cY>(@5+ANSbT_Yk3#$y_w6j8jKrcYePQ z=AMZow;g9EcNZ`;M>DAY>F_YL79)?f_~jxLZH( z$Q5bOtn|h@Dgq_zDzo1`V!wXm3DMNs3jFkmMW7Z^Ri!WYPP;}*qQI{ov#%fDhQRL_ zQ{c~afgO3!Qrar10uU)DN>2gn8`{2R*xxVipm_)p^&FGsIu2>tpsy5Bsm5 zp&B~Vm3(Cls0Sl3q%Tr|7t`uBq;6dV-Xwx0VB z?+T89gAehcVA#Qhx3;&p9YIGf8{33ufOGKV?sss0KIDL5i06U<*nwXmeFqnE_#J%k z71_jb777m6jQw1Y6k@ITA?<`=5NNFXbiT+Ee?+RbiPwd5en&-Bd#BPs1qd3VEr;A` z4q9!P(BMpvPxI#kRvgw(jVVzE?q%ZvRj^%7z9p6mK{x4D=Ha#-myejshh@guw6_9& zhu$vdp9hx?tZQ56FQ{5=HQ^;fv3Lj3+ z%I+_o`taHk5`rKc!4I#LAiNSHH6bPlH6dCfbCJl@`Vc2~jrD^g*o$Mh2p6&rJck0% z!ILmS8Nv?U$kxah-}<#240luFurp3K5_~#)mJ+jPfrFjet7s#a%I7oj{cJ6j&WfXG zBcVM7*dvV7wGPhlVq=#BdM?@u)}=!JBHGyHqYc>8$oYZCg?Q8%!H*L!Ku%42!rTn~ zD1Fpzl;hlq%!fI-m2MV!iVjz89 zEhZb~e7Ig}CCBArxmzk`x)T5Dr;R@=Oz`vdb19x_%HUIRCjmCgq0eT0Wbtu=OADIK zC$V7qr)K&G;v@xj@XM{{X|)LaCvi6M-LTt`_z@=+g5VbmKN2@&d~M7e2wh&{({=D= zzf?>Hi-B@eZl~|u?v~ge>p3uqMTj9I7R6z-ZN$wP=mECK532E)zfuSfih*Ra+{%y3 zg z5c1#Xkt0#))~kD&aRv0N0&dG%0Qj9tdwErAV^4zK32i%ytvp_f!d=ETzJ0_@?;P?RhJJ~5n@Fwh#LjXAsaEAe(4|yEr$mAZb8%GVD|Fj;0xD)pIwgun0&j*_$ zkqjX^!#fDpFEYEpPOxXk?>q7N(uZ49@TppdscguZvv8Yl>aUm*KF;)8=rj3^m3 z^(9Orc+`n3iVv`71L)uHT%+GFkk_JE1k1@tA29Sw5D>Ha#ZbTaw0(;__!-VO5U(ed z0+QA6Tn3V#M_^(FzC&?mtC2vIk34e&wQL|LUb5jO#9nN)djQ}H| z7a}qqHDM|eoj9?N>lS}zalAO^akYf_;`r3Bwt;U!e3|-ppx2+3i=bwrflv}U9xc8) z$kT$Sh~sO{R>BbFj|4m{1LKkq84AP&U>Ds;i{kfi4u4WlkrZ$c(^774x6=FNKv6g4 mZg;cN>ve`+?X%6x1z!4J`ZwOdKWtz9M?HH=QHSvV)c*n10o-u_ diff --git a/cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/CONDITION_OCCURRENCE_BAD.parquet deleted file mode 100644 index 29b36aa4189097666daf75cb726ad5f9d42a68a9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5004 zcmbtYJ98RW6kg*;?6rzmF$zqWj2rDtCbrpKge=ls7K9KFNrZQ!fdwJZ@(6EKyG)TH zRjN$JbqbG*)GkuF@T5+e$z<{?a?afccUKE`q#dz)ALo4cJI_TNlbqzeoxqE)!=W4&#Xph_d-f3S|C&OX0r#4U94X;|c$(#R=E#X~K1z zm~fM>CETb@2{-IZ!u1-FaQ)6BT(i{(*Y7RD4V%fJmxt6Q{Szj@6BDp zhj@8c-VH~1De_`(Z;uc2h5Y_Ac!nfirgxE-M0uBo>8>n=Lm<(sb(ExHzs+V*F-7`OJ1T8ci0J4K=UNkV1>^x#QB;7Ldf zZ>UP53mlDcJ04GEX8lFBla#ahi&Q3m5ju*sc9c#bRV?Nb>3l1d&1dFHCuuwh$dE<0 z)yFkC(}_!3~e9AOgl^=A=K*v;aSUVyq>lEcv*({aoOgucR^r z@M1|gqcBJ8)jXJUbiPt1Z(tywPI3dMONc zt-;GD!M4gGlP{p$rP9q#>)qU-kDM;>CMoIOK3DspiKoa{D2 zZrN=tPO2`vGc7ysYAJkCmUHKgL=k)=u{O?+P6l$VoE^Yf>-@fOVybdPQZYWE>UsU3 z$YL(^JYf80%3kpW=6rkxv4L8NLw#T^u(=V3SbIG{Q>lm&KKIT}jNrLshhh-hcT0WdXnMVs)`w5(9P+rNPQXX%5QxJn^!5~bc!5fSNS>f3p%G5 zoKpXR???mJ1+L((ub&ZDCQ z?j>wBxuV9R7Y1KKzC~5V?94u8il;iZZBXZqWjZz#X!JlKZ!E8th~XzhW;^%1qVyM z^i+m#88>b5lp~!=sJKwOMeN1+4l(@QBGH-UZzEq&n-I?TWKbJ^H))Qase{3@+HlyP iJ(ja}M)n-tNx4$!rIM2G@Ck7cV&DPzKP88US$MWo}l8D{+Gxwxvrp_a^N8 z$cWx>7|pM+E+C)bt+0evlI0jha`pasTXP(DP=4+x>K)`mJSus+)_FkCX^i1P{S- o(lC2k#mj1xrqwXZlC|?3XA9c1)7Y!kYjt7Uw5kvKG9CI~UyLPtZxl>JR8LtE$SPi>iyNscXme&YcGXwx=>4+vEE>-#zy`-#Hh% zNXjdbixDm|SAZ9Ho8vg1k45H|mbrzc#jv7UZCP!|{hq82j7D3D%tg6qEP9XU_{$&0 zY)i12hWPe^hw;9Hq3#d*LsM59LqpPyh9ViN5A2Hz0cbzW!A`bE_fAJM+Fd{5H**1q zf6ROSElwkLIBctmtv8S z&D$LZY1lV)2&g4#rk|`YF9pDSf7x@$CCh<1Q5Rna3FFNb4=Y@hjJDb{HcVY=YU)$< zz^8{-R{|V*d({I8WqWep>=~*)Y5)3ako`4}m+(L{TkT$Bp!Y4cCxOg;PQ1Pr0PA}qal%rc7%9Vkt5Bt5z8u8{S0M_MFu#N+ zV3v+Drl^Df%*RwmgT#J&%?2gU^Nsw+|JDe zudKOc!w(u*4~{{elI>h11pA$oz{^}AHto-`%yif}mTdO;ME=-29Zv~&AN0eob^rJ= z5xbK}mZwg8(-Q<|PmDEW_fL`q7v+{>OGn{Zt||U}_Y!~pi-2FWudc`O8MShRkP7^GPo_maA*4=di98{sZ;zKsgtFGogLr&Bi_L5a zURy0}ujAYbnr)E{XpZ2EJVx^Z$q+*Oq(#V6>^~d9(oBVbZGnJk5kWHR@FFkp#Xxa_ z5UXf`f6ypB!9@U#4{Nek7Uwp_ft=J5LNeW{r>X~Xw`$gt)fZB&M0D4lWMOwg9=Lr6 zZn@iN%lq?+8@wwhXcAmaS9jmL0aY*FwYTs>U3``HY0k{Q|l;FI%3s1BcXB}sMZ0~=DWi5z@VnjM}~;JR~O2=LTcMm zgsqND$izZuma1b0_#Y*ajdOE{ky;eNnhoxu5Mi$Ewwc<9R5>1+;#>F5DB9ZwO$ zT$A(rD`HYJg%mw#rbMmT%|Yy$>kumD2SB>NA(X|GeLj+9q0HfVS3Bv+c;Zfe{xKdj z*X+VsYikKn*eZwuqn9XSJhsWm?VqMsj9rfILL9Ybubc($cI(}J?Rm1ghMNvudq=Nv zx9{K;aFI{{dFgn6DnE{*!sfM)}Ig@^88se=Yclnrd- zXj!O&Zm3pB>|4!l*=nNebO%q%2LsU?G^AYO+`K6e0mV!R?2rY?|~XDMtC*!k;G8n<=R9^j=N9h|Bpv>aGakx=!9z*c`=k37{7uyE d_wKKx9wi^%#niF^Vijz`qQAnUzrx?> zETzWRiOFX6o1L9+zS;H?t%4kvL6#ciAPE45q05q%hJ>aj`}}$2@LN8gjQyF%Z%2Ju zQb5s_Edz|TZ9OcdqTd`Ju`iO1nmZ2sA?MSU?ap|ezkK6(i~S(cj`l!J_|A|&*m9gy zrKSHwgo(ezpn#@pnUy9mR+6mjEA$Y`(I!x5bsa71qJ0?ZSKp-_VfzClyAqD9ju;>t zU=4AAl|>2}Lp&473gU^_DUNoE1Z2(SIo-Swq!d;wRdHBZP%oeekqOAmJM*CA6o+oF zW3{{Od7;;wx?EgsMD@s`CM1Sn7g-SMAcjaU!=WHWUpUeg*|h*YvsJX)=WeIi+I7r> z*M8@euzOD_;tRw)$70TfE zD~3(2W9W1Izgc&{_~5oMp7tBPhFi!*je{Fh6V^T3-a!v z1JZAg{2n(e$P<>73DuXHhMjvqJ#s+#W5Yu6DxIEMHrkNd9Hd`1#*nr@ZSHfvm7ZKz zTaxD)D8{a9S|Hf;ZPT;yWFNEHxvjbDK5kM~0S}Hnx4U(G{n-Bv>;(~hC`tN||1Iazaym)rJl zf7)0;koNM6iY@;+>dG%7r~HEY$uBOL{9+2p zFD{ARnN4j<|f#uQmV z5Hko|h}8lofdIS+LJn6*2XA0Cx(fIrrlLbJ9}QJn&0sN-=_|#fGZ=?Ug{zPVFhjry zM9&Ta0fbK^F;O!q`ies$U@F3(63GX_b_mZwDV(n=tz=IroWfcb{nGu3lSm){d6MiQ zjUI|c*XaXpNvu~YH&6;nRSri(ax2p;hBN&NJmqw*DizS|b(F=!JdD% z4+|uOrDCS5B%}ROI1`fFM@_^mhMVUkF|s%D4u}7N2XMFfo>jv8XJSzZQY>6bkL*pn zS7#%+$roB2^v+44gJ;aqO#+r>z3 zUryz#rEt`MGq{9&yCS+E@m`=x@d){WPVAZZLX45(9A|C(psZ0^aF(rPP%4C~VqyQh z7|zv|6rO3kVcJ8!@vlvOykzyC$WK7(nem15dopkicm}o48)7ngUMl9AkhhL}Q;=_H zUNh^1?+GQ7C$WQxcxUrDeH0Qy(WaUf`ZDkk_z35mtBR>)5b-6bDa9x8+x+X`gOEME z+47fhEKcJMl~i&7bsdWvwn7uozK170A3DYGDF4#0^I38_$}d}^r%YSd>{|4fqBq8W zu~@&XM1ZTUd{vI*>T>%w56~CzQ$n3Fuh=PMM*AH^124K_%jX0q&3wi=dl&~xDj&3y z$*Z9`Hv4IcuSgZ)QY;<~f%|4p#)78*5x=16v#58Fl1xC*XmKsln*D!_^S1oyy4nBs z{8`_F^iZ{{fp?~#)SU78JI=S$m9z^()t}4V?sG}gI+r`}FQ?b&w0A<07r_@n_)!Eu Qq5l7ed%DhXC-C?AUq79ftpET3 diff --git a/cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet b/cli/tests/resources/omop/private/DRUG_EXPOSURE_LINKS.parquet deleted file mode 100644 index 183ce17cc8c031a9a8047c7cdc8c92e847381ac7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 597 zcmcIiO;5r=5Z#vAkm{i^Y|}M8aIxV~5D*S}n^y1(Ban#P2q{e^Ak-pa`~_b9FCIO3 z@P|3GXkudG&6mv1&YPK?x7lq&x571E;9iRxoNY6fYblH|6)l6Np>3dX#x1_%VU5Xk zt~<6ytgTV4#R@+nBsL4wZYY_|bCbCrio;b=XSykDbCfRQLG+l+7t1snj7MCx4C_^~ z-W0d(X8$@?c&L+cm86*HP?ILSITW4p#^acumV4ZCO`~V-oiB&3EOyzT8h0JeZA#=ZL z+1FC|F03H`N?s?&pZqPkg8VbJAU_G5fi>xSqp{ofW5tM>_>#ILB@cECF%SHSZX&^tV; zpu&{zVAhv${S!Q756%kt0>-&0+ajd8jnjFv@zL#+2DA34=2^#j*VJ-TQ+pqFiaEat z`C+Q;{##CpuuffeiXzMiLzKS4wK*)FGJbLI9I;UcR*1U-b9OezhZW|(jG;v-AXPjq zHC=PZ=vobb*;wkWnY-r!JNM=8WbPuXHq@n zbq6sfib9d!F-rg^K8|o{mU0ubF_<1}wOM;u8`$MV-k=v^|G*+|DqQDH;{1J*&iGdF qVd_jaZQtLtg1`$iGbb3suRoLB+1lRPmQNHy;ve!SNk|?3NdE#I(=s{$ diff --git a/cli/tests/resources/omop/private/LOCATION_BAD.parquet b/cli/tests/resources/omop/private/LOCATION_BAD.parquet deleted file mode 100644 index d53d5b28076a5135c7cc5d7ca3be0072238dd330..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1343 zcmb7E&2HL25MB&IjmoL2EnBiGE?BCfq}a4lS}BJ$KOuhk+EHeU;?!vR!~np z_7QsOu@BJa=%u$l0}s$c51m<4XW@Wgg~y&x-#6dxjB%5;bul9nQ8CGkJNJlZIN@d-F|=hE}o7&Prt%tcqXN^1sASUN+W7| zv_`2%%O1l_Ae`ecV zcj!m-6zlJfR`_@*p3nTchF^49rz#!f7jb_P5L+FFvZf@hgnl7*P#VHzhCQQk;~e?-J^$_aip?X!`9Pg`Xh@3p ygBp5BwTwN)ytAPv<|WYzgZC diff --git a/cli/tests/resources/omop/private/LOCATION_LINKS.parquet b/cli/tests/resources/omop/private/LOCATION_LINKS.parquet deleted file mode 100644 index f73025f888f445ecc68e19aea4855508290644fd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 904 zcmcIj%TB^j5S?Bcf{F<--lk2uU}NI~6);3JZl;Jt@dYNnHl`^J5>c?As4L@FxOC~# zPw*T36u-ck>jQkmos-^qoVlGjbKARx)r{0-LXNMIfiZxXtpR|bB|(l!Bc;+&8MRHF zQB&u<0h0p}0Ygbn>Y5f2&Vy!4VdWs2u*{KW%lEpC*6CrRCWWCJcbajpIe91cS3Qfn zf)valL59eDBN-=bl<^-V88vmzGs%8Lz#z$&NT1{|)@otU>6C-30gks3=UEWTeu-}d ze`9Ga=D>`0{G;H+lRAwsA-^DjdhkMtd#nDHdW0W8~$W3{@u zyYM!%c_U_6=ITgNVMS6#?4eu7WyCrbR0>@eah;O|#1*y_e^`pxbyc2l+IdYy^W+o@ zMULGbYvrKNVc6;=FM_Z` Yw;-9BolZ}ut&|Nw4`k@y-l9AB3a`(XrvLx| diff --git a/cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet b/cli/tests/resources/omop/private/MEASUREMENT_BAD.parquet deleted file mode 100644 index 9f18ea14831a13da85edb8df2ea98e7ebad3504d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3982 zcmbtXzjNAV7*??3#5RfT#Blh^^}JYjy*9y!_-eX+V1Bva0*wV|@qr`*wgf~VuswFl zkfGNhLx*0bj+r`i=+HkPe?h0r88UUqkn8n*zfZ^l%gUL^w$Q8Zd!Fz8`5Z2iax^d> z-~zrp+_(oE$MJkP;M>^Z7B)inb+z6%4OLe=gQK<*@CCVGIQS*c@pr!pGZ+A-@xR}) z2^M>*(eHL9vCnULfM5A+aL{?W-0jF}Z_10Gd>%}H%-KwzI@^28JzdjBiGPg-}az;w_wvf>_Up`gRbFD^Jc+==EI$E zV2MSyGOTN+dQ|TpbxggX8jb}0bH{_?zq^;7*c7e-z4dCD7fUwHirHxo+=F?)=;6sP z%XTn}M!nNikF@T{5yrnu9^e;#8@$B8t#-5JK)+x1KtI1{d;f6Q?t}YXSvCzrg_fOc z{day3nwR&(0Y588$k`&jLGtISP4a*h_ZpU8 zS0`9DKCbO>p@qKOQuTUZ6U1f9H?1Y?<)uC_`$DD6`P+?qWKv#nV(UF`59D~ zzmd%HH`7&qMj_=tM>+YKq>`Ua8-st(1>wt6@a^0>2Ll)KlXCfo%6CjDL$8)2cwLYb zA*2Ly;8IjPWcJL>=3u0_Drn1L-4uqV>$~w;H;lRfSeh5+H%e z{MRf$Ir8jNc%-uNXF~=3OJCvz8_(jQP2#8*Ep1jb>(%mcO^9P0BPAX0Nn|eHeWQgj`_l!3Gh9A0rRQBnI_cyv{o%^u#mx0 zfEvK3b>(2@8DG+!4@d0^-$_H*IgzRfm=g_pC7H6~x9Y!Fuc%ru9XyhIy+?J!=#Dmr@IP+5+u4l99&bP1hA&a@W$Z@3Tx^-+itwNK|B@S=TmS$7 diff --git a/cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet b/cli/tests/resources/omop/private/MEASUREMENT_LINKS.parquet deleted file mode 100644 index 8c2c01dd632bc96ee9b9ae7d157c652ea2a1d598..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2309 zcmcJR&2Jk;7{+%u8^?7V$1Tj_EruFM7XxX@M}rDVp|g(NCUp|mB=*J%g0;Qaq3hWA zBTgzII3PeADh>#7sCwXl1gZcbgb;!rdf-wa@dt3I!~rfuJ#s)Q&&+zU(;B#tm4CZ4 zvperR@AJ;uPSNxvk;zSo{3a!dL4-qs@P&Bmb5Rfk2@C)k3<3`r0$y+c_`opmgAot_ zqaX+l0)!Io0rvs|6mTC10Tmnu_k%Ea06YkefCzX9JPaNIW8f&zz@s1v9s`eqC%`x$ z0>rP6;}j$w;t7XjHX4#y9*Pj{>9Gk{S!#yp895#C&za$~AB`t}nN5u?u2?S^A6M3Y zeeI>_rQ*hw@~5BSIqe5IOeA$i@C4ORpkkSgR^6&to6Q$DibPTj1ATH1>lQ%^2mY5_F?)&Lm&$*T|+nL48LT0~P4%bd*V{bX!5+L@oMDMkM z8}8VkMCEP|0gi05stykPbW)UugHtif8X7`eVY|h`_|jdoFvVBtXip6 zH<5bUYP9U;IctX-@QtKgm$WNG+8={(pu2{$cg4OAM95f@czOQ8S>_7cVJux{0n)+! z-gMaAPF3fka<_-!;4F7Uns_}bNP}+@KlBfG^#`DTxn8vD@!CdV-l^QzqH;~tzVm5+ z9)R#}G-Yq>zQVo4pCl3&F8i5ah*ikpUa@SnBX%XqYvA-SJRD_Z9q^wOL|M?@4Rmz| zb-P+Ers`IM?K9Wn_eJHhsC_V^?fRkg9|QMQicObNGZjNf#N5h!gZ#~tX5kj6up=?!5;=|%cr8{36XR!Z{ z>P*;MEobm9Yn8Y}TJ9S*RD;#zhg zRj8!x!i7xR%q}hDFlwbRl_wqNPPN_sd2RwT69-R%w-T%w-!89RSlY@P`c}S@X%!O- zc40cxD8@VaCS5%peJ~NlKd2UdhmU-wZJ9EEzmoBqQKAR~c_D(K4Y3Dc7 zFPm95twiT|yaKCd=Wz5pol~cTP^4zoUu{{3DrW7>YrR?HxHdT+%=%Je zZ1C*WmJP%|*Zgjm8DcUWcLps3#OrG|5dU7+ow}=AVd>1Dc69mHsl)8d2A!#OJpQ_2 zW65_nb%?w3&IB=rvOF7)8_1be#+*5AV19DxG4UF+G#Spu2*i9e1n0k-Hdw#k(oOov z0?RT?Kiskbe7&t3cZY@P0;^tbuYma8?1VY*)XJe<$(JA3bL%fDB9<=5}0{5lPkU$4UpNCa zA`k#5N)wNQ3QBIvnW7{-niD@xOzxCGOD2xie*VGnn!iWkytK| z;?YC8p$&0>IIZyj*9)9Lid}L@Q#ydh%fmo`mhl8iGbmcnlns%lVo2)ug5`tkyk1Us ztHp>=DLo5S$&2*z9_qIO%|L(z@lqOSor+HrfI2-SA-$}Y(?X?O7)e67DGQO1+|TyP zLUu0UTRti@Yb6#M4G-We(iDdpzF43&Oz6SvrwKrX9v9*tRNK-~u}3lxjbRe*ooINf zy(e^Jz(n1u1BJ5n&u zeHI81Gx8~xY*)7q98!ItV5DEb(9hU`M6Y$Ym$^ejFTqN zXW#)42M#<54}dsvMu-DP9smi689OL#skoqyGTzIRQw2Ge%(dny zVg3*dD(VHIBy}w@=}*02?#xD$=ZB*sDl(OQ5!iRZ^v(Fcl=V$OCsah5Bn*Hh!z5*6 z7)ps7m^qx6Xfy_6>i@lG6f7iJ@d_F5-s&44Tt9qAL z+WP4W(AIgg*=XO3nyY;kZ=o%?#W{Zfj<(M^1O~jq`GA)OVa$R9 zb)1zr-@_5se@0m0Rw=w6iQuzTN9pkB5S}OohKZKez_C@Wj9U5H(_W`majWCwhi$mK zdkQ!2;IPB z4y3!>T10h$LWGWp5$`0F#z$l!Z$DQ%1=d`*)Aakf!V?F1p|tyY?g`?vbRmJ>o$O0jJz zE{r;3%bV(cZ!~PXme(7Z?Vc$_MJ6W1mok$dy-Ebp7$!*c!g zlU~KvNRpFf@$IqEwFbKI1f@?!?@&5e`=?$o-z-RKr|M8Oh`Q;dtNZRa0uz~(kk0P= zLCF901;`IBx%|Ky%l9v;_$?yhzd^i=JY%R?dKEPonJx!sE#pkrpQ~ErQ+ONa-m6jl>oVje}Au4n^icYbrheq?xoqCYQyuMgxo4CAw4QV)*&S;@c*!~d_ zyx|JcLem6z@0ufp-2$UxTQVN4qBINF^LRN-qI4clhVzLOrZ8VM^G)*_Ug@vSc8#ck z6wIbVhG^+_tp%2`8qcDqkYB`>_+<{5!gg%upBeEc!Yk1>n*g?Bl`#lz`-cTV>r>lh zX(3BbX~av!9Q`StAWpG8RjBVHUbELmJZDSsx234V&>buC$WFQbjQf!EIJuBcTV)=r zNV&@Mxzx`dmN$#>axj~mF0#?$ARpXcB%{6HG3+a9G%x4*)VCnTMT&%Kf=6JvNtiyr lMC+Y#lI(J8g*=u9891} zRb5nBR8`b<)m4;Le?Wgge?u2tbk$!FWzmHfJ?G8>n)R8^ReVp%p=bY~xh9phz za?2dyT*q)DpAbTLKES#BJ7mT0TkEL$MY~n3DV)noyaDe6p75)m1xyRrJ8Am!MCygcWd_7|t0Ral)u^^=a?Kb_$NBs244j|VMmYDh zwk#R7N#VSorJUF9k-0~fbhUU-UWjrA?OOTL`_~aDe!)Ki((4l?%D5 zHYM%>RQQg|R?z17W|0!k4f~vhXUxRfQ?+R>+2v-=I21%U51>vH#SS?RAS<@+OcN8$a2;9Qnnu(7W@ap7FYyQb z*Z1dXs{A5dl%Kn8^8ZtxmiL&N>lKU*9%JfRIJb#r4<^fusKnr z^aP(Glpf(HjEA2vdWJJWFrx|Fc2V-`Bsl(uc_Ner zA)4G11cXl=G5?O4@z7PEg5$WNm6~Zy*2XybP`C?teI*v|6qpS?K|J>O72d^^HNK7C zOWaimWx9oUW{}%E?J2ngat-CPm%D`&?F&6YsE+uZGg1jO8l{5L1LS@vYI5teSF-UN z`dLUm6@;htupnR#4$L4zv~j@h?1ms>J)cWxz@wwYGi51j#y}sQPY+n8pAE-WybhWD zI9`xdh}UH=ABqK~Y`B{XX$^W7wa%v^6Gi0ZLnj{wJ?0}Ko~SADYFEkvpF^!YCEgW` z4?EtBcf%Tsw8~}~s7(({I+h;lrOqGtRU)HuI1h&1;hegh{)9{mG9^RSNB z5xpGoG3)b&{tq@mH{afpjrBrt|nPu8&E+7)r5U$R(~oH^bV1 z(Dacn;4rBnfT?f#{5kP$cO?Dmu4+7zJDo?8uDAQ!J^0-Q@88?Idyj*UgYfYbd Nk9_GSBniI^{{`9oHFE#} diff --git a/cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/PROCEDURE_OCCURRENCE_BAD.parquet deleted file mode 100644 index 277b5c25538944be1d44d4b7aa4edee99e5ca035..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3665 zcmbtXJ8#=o6lP?{iB-i#5>u5>q3|LCL7M2{q>Tz>xYXOV9Z8M#;9^K5Wy_{ShN7g% zW2X+8ilRfN4jDQZ9x`|6*db$ogVCu&hYUUE@*z@@iCQ6~%lkUt`OXtkh162)D$B5e zT^N`<48w3-j15HN%vv<^nWn3vERA&eP!+|IuFC^aKI}`w z6-+c8a>t8c^!jd;Usrul-d%TiL}w_*RFe-E8g+i%2kc_a0drB32h!o3VSldqp!|Eo zF)TczWTVU(_ty;{q*v<>pSyik>UDdvzSyk4{SxMNFvbQ=Pv3koY7dN_F?B`h{kji@ zA3vE<825%fwLol6n+2cVH!)Y!C=InP%#{ zyC8WOHUt-4s@#5$GrKX{7{5T9Ad6#XVT74zEPAng1Lm&Oak*t%EO+&u$}QJKxsU3a z+;VM_TfQ%HOEyC8DxHs8vej|R_crb$n;Exs_X_{85{6$H;WsOr3{*!XL~6BXjc?3e z7I_;?;B%kk2q9JY;`1#b6T%;n0wG5vdqnqmo{&A9WeF+c8`kz4R*!H;1u@u!Yx@(p zmK8yqJg3*Hu+EiHLzm=XIstJwfb|q6q>M^%%Nn2Mo+J`aV3jYxZGfUA=oFWPKfuB0 zq-&bcR}@jnoOQPA(^k6PmHLID)WGv>9!)D&wi5~9MXF_L-!4pNrV>)bvl2h;r1uq} zRG$j<8CENu83E=*Dr7VP>`!zP2`c^_j=X5CdA>$>hjf=t>CnJYEbWW|<5X(yD9!Et zF}zPeN4nK`nG%2zSPga|wQS4ABga(!6i3r5bp0Ghsjr;1o3$?RJprt#(0Gh^zz^*8 z@XH-IcrU5Eh@%US#!qb4k2`~!CY7i<(j9+o`s;|~6Ju&G@5iUp$egwtf+jR~x?X?f zCSE=~vGKB|ed-*a5B-&{kAyN=ONo8RyOJ`2IGl9S5RWFG zY^D{e->6vrBzc;|yv@I=F3d-I%bSlL`=OAaz*1@JRSwT{_y4k;_y(@?)u5?S!+AL=JRt0 zrNm-=DncFF{v6u=v-}D6L9tW5B{Lbo8BUVtO}^AK?)af9FW(RA2^-f0oeQ|sUZlg* zq9IS8GLbEG2q2^XgF7U zTJ_vRPt~f2s_NfRReP%XC#-ttc@I5TRn?hi@M2=DCb7U~-kbT&%kBf$Nbp0B;dVZXSXQ9FZvN_yZU4Zw zAIK}#FYb7lfA6!+{hBh>C*zL8_-mhs@ppG^;}2}#(qt?tn&E``*9+#yEgL3e!x)#E z(s9Llx#bbkulIl|i_Se83+Xg~*cDP<-SYtXeLKSLS|B^mO>t}tjLQ||&dtk5q z9)bO{8({+$FRV?jJ~5|~(zy^-bCuvfc0GW;3D`h;l%x$He?9O3`Szg=^4Lk!2DHBe z4z%#wG!xp^r88wDvX7xBqrSBkj0VaowjA!PHkY;Xu3s$Ktp%)|ja3cZ4A55#HTtOh zaxd};_y+I0pkkMy1-M>W3pY*{-ZhW4z4`nZHUK`{k2K1ZBNzp0Ts^DRU5`Ups0EpD zB>d*O6OX&*h~us`)3_TBG43`~i@VX7;;u87xEl;2?lzN$yVa=SZaiDK8x0fgcGDyH z7QUUio#2bRj~FQGP=M6yUkYDX!zA>JKZe(*{Tv~r25-EM2$>PSPYQ(eNw!bdc%G1B z{FEi+5T76~pCH9?Ndhx6$K_HD)c8{{CpnbMGf>YSqK6Zb2h|$NpQBL^Z+Yq(H^@;n z6^j85sh!frGA`Po5mLl>iUqUPtY35mb=XW*G`ZO5Ny$t?8dcP0s(scyXsCdHimA2N z@JefV)b&Bb6q|Jy3_>oe0FoYNVlfOiU!Xx|F+iFKMZ;7f-?>z53`HTKcGb+hn`&IP zl8v4`D(JF+`>0<--Uz9jEb+4Lc;}$RpNq|k+JZOa7>9flQX(_#Cey=qlaDu(Dk_n~ z>Sq>TBu`JOxRh&$?);&V3xBO-#)Nohav`I&Q3w3TE@CnI@fnq+h0K#Px=Q`ge5gpO z_>hIP1{lsbOY@H{{s>QtrgXB9^EsM7zs4sP4&lF&g@P#;)wn`4$z~V+v*g&eKXT^} zR~_eZ4*aQrWwa?)AL^16X?N&^lH>j$J|zB;O{t}ND6z37(xA5t>n9#l^PM@2K@kG+JC*pKEneAV| f|AWC~yq`!tjX#Zt-@xyJ|MOzb{R~rue?I>KoRJ16 diff --git a/cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet b/cli/tests/resources/omop/private/SPECIMEN_LINKS.parquet deleted file mode 100644 index bf6850239f9d58d95a7688f6a36a8e60ea398d4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 928 zcmcIj%Syvg5S`qHT2Z8kHzbfnHwG8=QB+XT#iYddqZYL2Mq(Ojwbhua0hg}*0GBRZ z`U!r6pW+udbEC8tap#a^?wq-E=FCk}!D>nBGB3xr$-yW<&ei}x&@DlZN~5UKS(&v> zol#Tgya5wE5dnh*PU@PL5zdQdOJU7JHgB0johS^N?eOxXS(n1ljVH}`)|>?=_ZOeV zT|o-gkS9lC`a}*8_Pa>Vq*7UAHzK%K_yFBqgUrc#(5W{v`f=R z`kPW~c^=GcCpZh+zSL=qaq;6EQlzgb7`kO!#eX?G=ii1kUg@1WIpa-?0a&&Xki? zsfbK1)e7gxxjRR-%}UU&wt|bp#INmc9us<3q0)qRd8#uN9k344v(CE6_PKd#-~6g| z4}XW!?tQR!c<$Fqt<%UYE$VomU;l}eEioXqltoOfzIm)q`y@o+lwng8=J!2ITs7t`Cm zn0%Fbvl9UE?XCwAHnq`sX!y(g@1p?Yo3LjbQRI=B(w0O_ZQhxD#ZW~eCB5ke|U(R~;3!NmW_aIu}S($xks zAH%zK5ZZW^*6q%($3f&&r;9}h>vg|)B2dsAA3=~+jUI5P335TZi{f+F$@Q|TY(jIP z*mGa)?WN7-wD&hu$lhF+bS}!p6Ynu1II3PLgX*-AuD0b`1w_{#3XEyh zojZ6Kj1R9!r^;fNFw|l;E!WRX{Jd<|rLJ82l3_yO^K3Htj0{DL2H?b<7jlerj!4=0 zpaF=N^s*4M->gaI#XxPV1N~aAOYLTA7g#d zWD_4>JtSQ%70rtx<1}{s128_hF-Z%NL{XyBiY#@@^<}9nNnOOj__Bw};7ljo;bBZg zg;azO%lAB#^7Th^VIc537 zH3x5nFG*HUnneQGk5_a9znP(Swe#_qJJ?Rr5HhRJNXHF|#;u$-6GNJBQNn zh{4+8=p5`$xmYD{idm%Av;L5QgOEc0U!EMvk9QA%1ngZ5!^h~#c-h~rV~w*E34soT zTf`c@6eox&`ppVG5Aj|veZ(7`ssGK?jrHhR2dXHint!cz2!TE}82ino5p7B{zo6uu zsM$jvPcC0yX5mUjUrQAQ+K+rmuQqAQXanNDS( diff --git a/cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet b/cli/tests/resources/omop/private/VISIT_OCCURRENCE_BAD.parquet deleted file mode 100644 index 0e2e7bc046eaf72666d4e7e14017729a6ceab6f4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3429 zcmbtXO>g365cZO6HneQjZUcf9s&c_n)$U@9(p@U`@R|TgmL=deeE5Ql%~xVDH3pO% ztE#7-dfZdhUr_Z>?XlV)up@{xtp9;)k1{@C_Gcyrfb^1zL-#^cYsJ}Cd}I6T4@ig`I|H$8*0llYf# z2Fe=mToe2%fQOQ=nS1Nhf?i6`n!D6wExl79OgIGOd~%*(it&XHQFj8C^M4V{hKz*v&c`yKxI+Z`8Bcn>H!-HeHFmT^nL=(|6e0 zHJZpA{-m?Jk)O8q7!Y|hL`3nl_O;b_qVr%9pRc-ygpdmS;`5A<84k4!938?2 z49n?c5^zZ6j4l>&(V+%-$fqEV6uzW4>L){~CU(?9rlatglrku3^|aJ$9aS}CimrX< z;2nG8A-o4(gU?JFe08SgGb0Jl5FwZBB$IUbEq-Pz_FND|x{9pP(ZwUWgIZDGmgh^xTy_&t)lTfx1qIc!&@* z6J}EVymP58a0_l~`H7_$ zlB5327W0ZuFFiq!75>M-u2q1B~iK%4aSb;Hz5DObIM{We^Rz)3x$}nkG551Y~lk zCzuIYi{yIzPGQgCE7cAlH)2mMKz@%5u%z)Lrwe?W9}W5nKPWmlZ}Ur+IM6uoM4T!o z*HWNse7Xb*wm|FCTJYeTJznFx9$y-tMSPXC*i(j}(L$cXI6(bFsvG#WpF_ugTR&7A zEBSKaUDsolv-#NhN6}ZXpMW0<`a_fNh^XRSuQ|-eB0hZPyaBbO460bGRA+z(A?$`rAho4U6{$txunwgyP21p?C#9Y{P*W^(`t!|s-k`p)X##kV<`U<)|Y^iP!VBbumIHr^?Sg*0(%Sg4(uj1 zW@r-TAa&?XQh$^}>gZP8ZFK5w&)@UAEJc3}1_OYc zr|NiXF{(*8qhRO(@GKldCzdcX*!n{BG6tMxa-M`av-M%o*S205nvZ z(47XVXH!IzTW-a>bf^q%zb8(8f5FK=JqkiMk%+43aXmfLzl1TPH#gI6tJe`#F%^PZ=w-Oqs__NZ7|+hOu2-!5^*Q z;ZcUyTX@K&GW#eEj>#%K_!4KlXgGN4i|yBylVl-h*A8lK-EP7@YZo!q+!)p~mdA=G zyAyhxY)p^yS;wi1v-Lu!v|Z@ulZ6z&D|-Z`8@M$`(C~1uJz-`qc&@GR`A57hUcd$T zbcA#1vi8O)TIXlUgNst}S>J~H?80W#=6XHs&kpu&ID-2E=P}U|O~-%L@!HF7v$^c} hexsM@dVZ_gs3+FeS5{Y6^>mz&;71<7rz8g-(7)JZDp>#k diff --git a/cli/tests/resources/omop/public/CARE_SITE.parquet b/cli/tests/resources/omop/public/CARE_SITE.parquet deleted file mode 100644 index 18da482a3476066d2155eaa5bf34662eaf46975e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4084 zcmd5A0CuH5x~W2vUVK77J!-J5AcKyK(bs8c$Q@=}qN0X;Rl|Lf)xZ zAjE%9MvXL;rc!>ssy48!mY|AY^G47^Q#jb94~SCDJd0Q1q@4%iR3SfH%W$1=hzUzle@ zQD$v36!aT{@GOGZgd3n)Lip&zn-C&5 zxPlZBdb=frf4t`*e7|SuM)Rb5dNOGC$7irKx)(;XH(Uh#h82Eg0kUUv$-|A1H~FV- zQrE*PfTE-vW|k8h1E|OkQo027r?oKecar(oOXh{%Y0`9l?hAb76MkP6Uay9i4Bdzb zl6nXLu*{g;$NUqo!ZA$rgCJ7)3qP)VS=z9=hCV#&UbtB)1~lQ#1L3y^VbFS8PhWCg zkO42qL3h-LdW>~zWCx2*e|aeU@bEsI{<+~f{oq;?-01uDhVaeCZN5XFMVa8*z-Tu+ zI`4-(KVui$-)7y_Zw&7TrKK`IV-CYuEVMUY23z=+bh%1s@9y9n$p9M)hpcyVc7=cE zyb1SuR^F=pg?s8~;eMMm@ZD;F71jekuRdkq&<1_|;1_Fzi*RyGjz!~~6yM$3+vB2K zshD~W_lU&Fl!}}r$}tYMFgn%C0sS&snf`U|)BxZEk%ELC9%TKy9)y&Fj@jRC=o=0Bpw{{e>lq;8y z(}%@YPA%rAikWrp1Z2oeZFP|+=gp)9`jXO#w(FJhrD7&!#RM6%B!Z30e4z{btfUg~ zQ7kOk0^h}24rI{(6#T%xkgLh5nO2Tp>eXYbDx=+!>r=`Xuj{CV-l$fQECb^JecN|( zk~eGMF9|piQx^EOS-x08j1#4k5*wL=UhSeRYE|%i^~?bO@j;^^jTY_ESY}R4j!nRF zrI(#49kp$A%BO~GiHN@*DbH$h`LxkVfxZ7WhQ%6Q&tcv?&l%}y8}Zg<4Pq$I8Xe30 zzs2G7-c=t!AI|kocByx)^SK_KsgvHMo=#y*W6g?RYSjZ-uc**t>bTxfEhBS$s#mao zuKGk=swwTWIh)gbp*N_@YP*rP#&s*+)3DFoUWT4q=;eqyZd4D@zg(+KC2hX%|4IJo z*}A{}hFOrtc~Hc$a)d)f*~5JsNs`VFoKYllw7b2H-ZYV{P!DS{fsL`Cp zJJkr}^g`=lPZ@GtG9bs*h}N!W$`;CNM=$`Vmq5^6GDV%(Lys;|h9qi8%v$vTbk$xX zeUmN4z)@O8dlX-mg33?>9zF%1rKe-4iKtiNqpED3ljOeRpCssz%$m;L#Y6QyJTRY^ z@zgu1p$1|wZ$S>_l|3-Bje>R($pF^)+`a0!xaGW=bvbHk)r3g#9lCfNd|(7#4gj@A z5)(p8;5bE-uD`CmK}s(wx6q3iJH1-?UI2RrQEUcKyXZ`CtEcxr9W{r~jb87$-tTuO gTjOSb0AFZZQsjmBLJV2()v)wqzV|cC5q$6e1sjU!pa1{> diff --git a/cli/tests/resources/omop/public/CDM_SOURCE.parquet b/cli/tests/resources/omop/public/CDM_SOURCE.parquet deleted file mode 100644 index c8f6c17025c81043cbce7be8b1543bc99f52eba7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5823 zcmeHLOK%%h6rQGSTBebJ7Mnzhs%~NlmDZkdoMw=^xOV)Go!I!jiRKkQGGn`b+e?;+ zAHae|m#E8%6{=VwgjgVc04p}^_yH_9=guS7apKYxRaM0(_KfG8^WE>f?zze#$qSxq z9?r9}4`1AEj^lj3P0!lK7Pq>w{=TZ2NBz#Qr|L&-+0;ENUe3Gez3bzAH$U05?7-qI zz_(XigpU~nSy6iWs3{Mcoi>B?<&`B+KdiV=JVG@(hNkxz>!&MAtiQjy=~=N{xnsN0 z(+yqj>qnYAVBPuV>JpfruDLKDAc+ewo?Tml@#}RL!;2Urz1N309I2hQs&@xRO^tEt z`Sm48f2_J8xjpKrvNAN}-qZnPP<~upg7VjmQxrQin%-A?&F-1>{B~mr%HKC#l)DZ} zM;)4adqC3+p*q%kI=IO2`h8jLJmp=)4>3;qz&ID=3w{Z$XSZCCTTHMS4qx4xGvD|n z#I3LP)rM}$9xvxv+ge#D{w;2Q6N?>X7nL=u!_=$udIe0+E5xm}eFLFUzEEMT@Pg8@ zNS#v+&cev~wv(XKG_&HD#JC33NZGH9-P(vw~W|3A@jI=eER*X@;3H&auuk2+!#l7agYG!fsse5l zu+R>4TGeT$Pz0P}S}|jTS|wv+C50>jti2@Y6%i9LrVNKkhIAWRB0rVODM5=P7nff! zVrEB5qBKz24j$zO_7+E%x8<9VDFBy3H4W=vY$gzcbT-!=R?C8+V!R4Do7Y(%l8g`0 zam0)Tv_z^~ZI=R;FOGj|x++S2tt<>3y!klW>o59zLELG4EM8}QfH*lmKtEE@*?z{pW@@zvJ(3?(g4vN) zNwsPviS{4U*V7EtY%MMvtD+>RX4a^Pv1YZMGBnd@XmMktoUMJt6B{EX4z)igJ4w43 z!IYuIOAS>l9oOQqKy8jw)#kKB2lh&fcg;}S*{&jnj-l6wRk1YIVp6w;eP#Dt2KHCY zlv-8Tvy^yjs+T0OR!Pm|vNUA;oE(tuObM2zfB~FJvuZHaRTFu@-e&fj%sA3^V|Aj4 zS?J}`Py-(juL6G6%+j=04z=ce!o3LNQVJ+B3FN@rECJslgAIGMnLSbF`Te(hG96WY zm`vdgj;#cauY)m+u}BAJn}V||f@5v3gYyeDNu18R06v0%kZeKn`*-0X8lyuo?959P z;zjXk&1et{Rm2+EBmQH*pBz5&`%yhtq?0s7_30yhisECO#=y|xlVgaH3Ar*9s5?}U z4ugLGtZ;vB{nMdKF-UkD?qMdEQVV8)p|tZ>y&7}z2UihK3iwCB4-CV33Uh+r)L;6O ze2zD_enqHlF*}tjVVZZ)?4bRVm-&bBPt-8zy z0!Vm1zJ>&I1!@lXR73;0)ALCLQ*512;1vneLrSCoha#H4B*l-=_|r#JvYfW?A^!bZ zIW~iR;RI$H%>l_Mwml)0LwKb5tW%JE3c%zOWeK-mpzbF|wSs#H=Aww&apVW*F}T}T zYUc2}puVs)Eq-!b>O-%>Su2tRBmdOpW7QWN_TW0@6GJ{$eYy1k`$7}^6y>tj!z8nZ z>*q)~2d#j-RO?&^#oz|%g8JP%2cMp?a%c*Wl+WYG-1@z)+Xu|Nq?`-`(+KIc+4udiOs5 z@Be=9_y0PYMn@Jtc#gl#`|iQq2EWR$2K^qK)mk6-u4x3* zH@4i+x?L_eC;ZkG@_peF zLc-dsbO-JMIF5HCo(*^aupbiaf8JnVZw#8l5g@g|`}7Ur@9zH(VC92fIE1Sv&sq zAn(yV*Uz|S_{XPR?GAr*`Tpb*KK_yL!W(4jSnq=49Pea_yn~ON_{Ukl2U{eGjqLb; z%=iy5{Nwqc`NYv-p4}8)z6C~L8g2S72MiFF4dB69KW$MV8$r*UD*k5C86t@RvJ)l&P{Q zc0_9RaIVn$*H?}nuCIX0={-=s=(AZH`fuK59`0L4Z}acJ`h+ih=-;ndLMU%bP(zcW1@osKW8D*p%wUj~4Drpu<;5h~P~L#Q)Y1ktH)^0!(2 z+d)-t`mu#S;k#gvU)OVmiOEraGK2~@t*H|&Q{;VB;rBt|H^Cq%a6Ht)4`~12!|CbI z89(N5(l9w^fS(C3fBJpHDe!HC^RJGSlV-QZ`+>B(?h7luw#rJYHg>FJOG_;A-d#Ub zkr@x1bYW(`yLtH8t>8VRP*Zhr4%F7o9NANjCgBj_W+*1Z8wmhIXbFguvvT}8T*joq z>^gV3!=P~sGfKN9IiEWa3_QQ`cP`;7-jjqYezy^>_#;d;u1@5Ng_x3K2>!_7?&3ZLIK^nblVykW1jBke?@UNQP`cX!tt z@fPy?_u&~9y%KFhUNI!?cwx9BiIFf!^lIJa$Sdx6LtZKJfC`Bc?c(sVKc-nk9SgmZ zGohkhil%8H_Ro(AHKwGHX%j2)j?1i`E|wpdcC%$HVhfRoNyPS5Vj+W9G`{FBy}cV! z;;u=`sF%i~#Tv$<8HX74I=u2ZZnqfIAlCxXUPq~A%vM<#1Uq($N_Iv0ys)@L}GVC$_tZ452dLf2D-%n0m*CshJZ zLQs%yC9yg7#4JoF66wsWKbPAnNzQlDnS3Ywc(3&!-Y%qz#a!|*-%88*%vrph(w_um z$XssqaZbv#6C&&vlv(pzErv*MJLsy8mg6|lX6&|Hp-9M)nZhtl+MH|p1qd>JcTpFaWQ5SChcxw zCa<4|*p2h@BwZ=zLP}RXZIpAWqVYzt$g@GaE}e<>QUo}7g}!_Wa98G;d;#Uo<6U`L z@8$-Lq&lsu(P>4JMWy#ht#@U$EU6t6$B}q1rYb#E({A$^B3j=Sl^$Ua-IA z9-(GA2KX<;5w^)Aj!>g4ALBhLQgH^Dtak)IwBE5esXE}!w5+(RrAVhN_DoOq*M}tbQ2%Mw88+KHXi&3K_PN)w;Y+*cW@jZjbUIN3uh2g(9i$l}F+|a|c zv7d#sKq@#s$4x2B7qJX>ae?gRqublN&`HEWnvl4WZ*Tyg*a#b|Z#dtPq>sYe+uC-T zgqp@UTcDZhkSe5I+S^B|q{WZU>*3-Y{NcFK-G}=YKp=}aDYWrIUy3SuWYd?xvk$5R zMN&g$DcY`pE~Q&U|H8N+ePcgKP<*?_INPWCD5+XfrJ5b^L7ZA*f{>&9dm4W!jZcEq z#t!nuEO~tNP~#)pa>l!Sf2JRH>`e9jBsSZgZf~Pvazde@QS|fZxQ?wQUczz+Vq8?j zDB7-FEysWpwL_8yUJ|24;?NjhWd9DHWU+taK%GI(wdw__4~_fk{Jv+5b$(?8Y2r7D zL!150`3Z95xJsEr$AKZ5#-KA`ixz)(j4Dc27~*x;jlm3n!+z7@T1O_7=AzWekkx9{Zxl>JR8LtE$SPi>iyNscXme&YcGXwx=>4+vEE>-#zy`-#Hh% zNXjdbixDm|SAZ9Ho8vg1k45H|mbrzc#jv7UZCP!|{hq82j7D3D%tg6qEP9XU_{$&0 zY)i12hWPe^hw;9Hq3#d*LsM59LqpPyh9ViN5A2Hz0cbzW!A`bE_fAJM+Fd{5H**1q zf6ROSElwkLIBctmtv8S z&D$LZY1lV)2&g4#rk|`YF9pDSf7x@$CCh<1Q5Rna3FFNb4=Y@hjJDb{HcVY=YU)$< zz^8{-R{|V*d({I8WqWep>=~*)Y5)3ako`4}m+(L{TkT$Bp!Y4cCxOg;PQ1Pr0PA}qal%rc7%9Vkt5Bt5z8u8{S0M_MFu#N+ zV3v+Drl^Df%*RwmgT#J&%?2gU^Nsw+|JDe zudKOc!w(u*4~{{elI>h11pA$oz{^}AHto-`%yif}mTdO;ME=-29Zv~&AN0eob^rJ= z5xbK}mZwg8(-Q<|PmDEW_fL`q7v+{>OGn{Zt||U}_Y!~pi-2FWudc`O8MShRkP7^GPo_maA*4=di98{sZ;zKsgtFGogLr&Bi_L5a zURy0}ujAYbnr)E{XpZ2EJVx^Z$q+*Oq(#V6>^~d9(oBVbZGnJk5kWHR@FFkp#Xxa_ z5UXf`f6ypB!9@U#4{Nek7Uwp_ft=J5LNeW{r>X~Xw`$gt)fZB&M0D4lWMOwg9=Lr6 zZn@iN%lq?+8@wwhXcAmaS9jmL0aY*FwYTs>U3``HY0k{Q|l;FI%3s1BcXB}sMZ0~=DWi5z@VnjM}~;JR~O2=LTcMm zgsqND$izZuma1b0_#Y*ajdOE{ky;eNnhoxu5Mi$Ewwc<9R5>1+;#>F5DB9ZwO$ zT$A(rD`HYJg%mw#rbMmT%|Yy$>kumD2SB>NA(X|GeLj+9q0HfVS3Bv+c;Zfe{xKdj z*X+VsYikKn*eZwuqn9XSJhsWm?VqMsj9rfILL9Ybubc($cI(}J?Rm1ghMNvudq=Nv zx9{K;aFI{{dFgn6DnE{*!sfM)}Ig@^88se=Yclnrd- zXj!O&Zm3pB>|4!l*=nNebO%q%2LsU?G^AYO+`K6e0mV!R?2rY?|~XDMtC*!k;G8n<=R9^j=N9h|Bpv>aGakx=!9z*c`=k37{7uyE d_wKKx9wizoCif;fOw?C=6kR&hiejfew`w{;Y`z~HzjwemzM=gE5@J~pZIwAci4)5=*M zD`zUzQn5+zrbRlikXwV~?*Sml5zubl6P``*{$IPmwUZ!9Wh*`rkoP(W{xZx7jxjf? zrJ|ZC*V<|RK19A90-7AB1NGT7XO_(Th*H^#PXy$>4w|=)ahh^_w5zprrM9l6YZ(@G z-L1Dr$aRc62kd}18SR6zCFg{Ahp|&DQcZjxOaja zocNDX2>uvtMB&En*2f5{N!CX^)Z87&kIhiYo|-J8H;G-N6`u&mdmw1#`Pn<1=NpZ!>Q-&Nq^k8wB?C3Sy~2O~HhJU%svN5W z*)(!(SvK`Uz|vh=P%2yTiGaMa+$YN=iUmZo|3@D%y^6tLxR9pI*HUm(vu zdDkV58hJLZN7&%gNayQ%iN=ikX^I?9^~|^zQ25|9S14(vYG%D&gm9TEJv>c5J>5&G z^ND?4kcX@4W~Pu9-vJ8K1KlSiUCfUpdcX4opi6shxYMJNJcLoSf`+H=Xw#0!jgzif z_!hoKK0Wr1R=}qe=PtPJD%nIrWu`?-5N$AYR6=xGU29nvn=PGIJGwGe5Jv&rByXmL zhzosy!#$#Gd?MMLF6Wc6{+)CCayBLXJ5Q4T;S5M#(HV{O{ai&}$+_nx9A!wKk00b; zpD0KlPXVOAJ2&b3ZcARm21;Jio+v#WlWb%}dSG`5g3~3B(pc=H#0UBgDc?Bm#%qr* zP)eh4<8^`3U8>}0h|(Ml_BQ6?9+C5lXh30$% zZ)yJ-cmT)k-QMucZ>N%qhhgE+VspNMcX7Lk7i6Rqbb#&;Y+#-M-c>CTs%zoAC&TC~ zSKJL2zW`l5ns3<0lB&f%oS^Ke#dt9}7n@hZ@$8z<>(IhIX@LmtFWgHIVg4RZT8K?kMh@X=zwiv3hK-o6$N?SWt0c*Cc#2-v!Z zH;WGw>3A&v7B9u4^m5a_XpTjhEnQ>PqP{SD==q;YZtiZ(L9Q0!S#>V9sTR2%P+vgL z8rB)(3S0)u*m{SeffpNTv1b*9hCS^$yA*_(tbDMQOfT*mePca!}TV~`fs&gU-wXYShdT6cD;VG>3TkY zD9DrLbmdAtQ#+%U%V*M+N@;fz{sXCQm5P(zxwD?L9yk@l8T$Eug5z~TScQM2{{dT) BvQPj3 diff --git a/cli/tests/resources/omop/public/FACT_RELATIONSHIP.parquet b/cli/tests/resources/omop/public/FACT_RELATIONSHIP.parquet deleted file mode 100644 index 93b22f7b7bc26ebe5d0ddf0ead81048f4e396752..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2167 zcmcgu%}*0S6rU|?TfVeFXWcbDR8uz`0NVly7&y!lO3{K)(b9&5Ed8K`e%Vj)VmN6G zXHR(JL_8Qwi1Fx&lYfGG;^^5J6Mb*DWeX`J80(~OX5PG=x4+--&1|~Bf*#6K8?^^u zAiacG9iO&)$UYb)RG>Eos0+eoYIcY`MnoQC0_kt?2qB&QCg@!pXD2pJe#<$iNg5?) znHs%&q+?15GJ&|5n<#J14(0%STs$i;?V1>ZanK;({+m zK8Wi)K2W*8;^K7PB5oFcfm}X!G~uGxMdxzVE`P&=bU7|g>IzV+Ru|jE#8)es1lxU1 z$Qv-g?V!%a8eml9F^)nR9*Quc*T7`i+yD9d8j%mW4aC<+8k#`UDy z6kVo7vD=D-o+e{6>B?GmKBq1#iLiIP9QHo0#(d-X2;f7g+Vc-^F9E3Ff$a|iU1^CPH2fHb!5dZ)H diff --git a/cli/tests/resources/omop/public/LOCATION.parquet b/cli/tests/resources/omop/public/LOCATION.parquet deleted file mode 100644 index 49f8f3064e909be0ba3f59f5043d0156e67c9602..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1865 zcmcIlPfrs;6rbHLrNzb&!fbZU9(rLyg5Yk;pK9V^wv@jGX%z@uNZHm>SZHkv4UHec z58%NA@kERtz=H?k$+Mrplkwoe58%P~W=p#zsWDV1%*?*uy!rj!zW2&*@T^4@I!s5? zu#pi$LWV>LkzrCoN2q#V90zI8&=f>91WmQpMOX`Kh5-jv5J{?sFl`)24*dQ2Tr@_V`5x+}EIbS~pxA#~+;2l2k#=!o?aaR$Vy}jJ>F6(Kh!(6l z2ZiGtjezKw+|{ClMgVEET4N+_V7nnI3hW0kw|=|z#(t>H6lW)2WtoB{50ZqT$b??Fu+*H#iMSZ6d1Oh6`+GF3b)1z z?mOGrBjv^iu@Dh8~*rGet)626LZai$khBM`kNSE0~|A(0u@PVrxNmo=;FfB?$V0u z#Mhs%39k(D3O-mMt|GqD66PJ5I-;qha1tb*KRR$;gK5|HW9tX`MfYsqeOTEQ^=M#y zfX6EuI(hNn9sZbT(MqoR;IF$k*ILQpA)gJjTyAO%zgdI+=#y|OZ>hqEo#*p>hhnC;ikae&nJa@Mf#VcfNXjqrmfM;m zj<#b}{&&^{DmpjCp9sC0-15OQ+g%4@gD(yI^aSRqm&^`sDqG3&?PbdRWIQrDJ`zvH zQ)Al(W85T%SDY*`g>X52)s+Hr%=X0C9DAN~?lwyI3bG0DZNGUK64 z81Wb!UYGjycuYlRJ3GoMt>3VEH#7BYf#G-IaC~HRC^hBsSxG)GsVDQ*Pc87-P)1N0 z9LwqR4T;~B{Cu_Wdd%_3>(%V!MmLkB+x=@x*Rqlo>)6NV2Tez7G)3F^18nF& zYgy{{T9){9EtB`JW&d-#*@ZQ3*6@s*{pfNvd!@heo`44 zUEovRd->2`!aqOr$@Sxp{l0eywX42hGw_l#^Bf&YMdQQM`96^+pUqQm&Qq^Z!VOyl zp26h?ZxvtT=ko=0jzp-Pu^!O&#~F)&f_Q!i)CfX|%egxuX zh^DQUecSZcvEN~#$p7cBLXx$Lz1!Y?6YTci^gkh45BK%G(}PhjZ#c3Q_r2e`^7rqa z$M7qmeMfP>_{WK>|Gj9TxNCVsM%-4R0E!grlnk89@fJ8JTXY=E1+3V))af*D-u_J% zD765JD+?XmoNnUMY$(PClR6w%P4e3&^;n7ek=>!>urZ6}obXt=&CgM)=xwvDZm&pb z370cMGt*l|d=lOUhfoU3^-!b`Q!mR#8W&~yKDK8#zAdXd4@&Z3Nqv*6Cpj`|=*+T^ z(`76> z7B~CecJGnzOA^zdrA#Yw6-yOXQh=+}3tW7>B?A}ik-8|VNL;rs$Q7vC8E-MTWH0Ti zmOB?K(6g38TBYg=D$j&QZ)w)QJ$;L|1@(=G9eiH8d>2ea665i0kZaI$%FXsbr3u3Ul33SOoOe#|rD% z*l2nT~poi{0sTHpQ0}SC}k^pFO<)SqkDVLsBnQsP8XD zIEGFN$KY~@gHwecUWx-3?(=zxk%mV@@Tx!&a0A-6ml<_bI1)+q$G0VhCPf8plEdn; zW$M0V4#?!Ji!Wg3#3P2CdAgl}!Ty11Lm#=+Ke=W#bl0(nT&JC#&LxOW<;#ue6f=z| zW||}0T^Sr+ENsK}!Ym7JuaZm!Oxo+Un-!*BUtwfo;l$u5QZW>bj*pDQvB^)Jk4{VS zIV?cS)mN7z8?)NDSCsx$?cla_S1Xwd`{>a42)q%sKaJV>YV~OKte6FP*w1aE2v)pC zcqEe)eyYDF>6h?ON%-~G_-U=#U01ucu|~Y6!f|tt0qLW)u7dQqB2{AYgCSbN@&a~7 z>r-CA{!1NK&nDl>XX>Z5hWo{$sAu?nU6MbP)P1$;ff`4a=6eP2A0ESciM~qAFi$o8 zOOJZQGaGDWcZydsC4V$J5Kl&UIV!x;lzqt8Um7Q9ouB1|s1MFKY*pI)382W$P@nVo zQCaGLzSu!uS}=>8_^gdypOI_(WPBcEvv-K}{2WWEK9ap6=~|5SLFY62cuL{S_F>!x z2WBniO6I4pdt%belF1y+k<#?A94d2^-LcQv_p&+p;xzZ)F6Q#I|79PazoaAY9G^zN z#8cZGpPAITJHOn+kwm?RQ^p*h1JwJrwcqz$R=p1!r+j&_98#_FoyGXiy)uVl)4IDi z_N>;oQRV0ZHFQ0tt=BZI3q;rBnl`EV`n48K>(`q4#XFx*(>m#?Nz>Zt4u0w`7>ulf zm=>dH&G^%rCn2t-Xj)4b#I()C*ORG^Nr=M&Ec%HLkddYa(i@`>h~grjv_#;kni^ue zHVgkc=;gGlm_(v&;eo!kU?Nn%VImgl*clEsdU|_y*M%XmD}eY`?OcTqOkCe0lr{-f z;*qpzK6>=k^(7OvxpZiqn+T;0RtA~|6{5U5Yb*Pc5*veFhFj(Xsmr*UU5bVrlH@m^1GN2I4q>ztC; zkKnt?g@D|O!OiD(Y9`puh^7)I$sl0e3y=wpPb0RtCjF=txF(2)KPFO z+_Mrj+PyN`ikR=huc_hgcr}fR7Sg^Z9lz{)-AtCU>owfkQX39+5BK$mVd{4@bnkTE zLj{+Km39nLha5@)Y_r0h{B<`L5njcLD>glnSf{ zxTmjn^t`UGDYrgqwHf+$_IXzB42OK^lLTs|X?;5WbpD$=)4VTTPf@a#QBPuN0j_({ zv*WrBO&h26Lm($s8fsbqBm6~ci2>2fy65Y2`>WB?Hp5@kyZ{sULf@pmo(esB{fkGY zln+Nno*a*lRY!-1t0N;LLzCqb_=#X}C{{CPB$X4tu8eQTe$$#XjuC2PVq=y>)vn@xm zHT_F%_E0w1!w6&B!x$};Fv1?j=)=fE7~3d$>cbd$Dy4){3e8JH2xF8H%G&Qc_ud@& z`Z^>u&Lvm(eCN;a{J!t|oj>scPmWQQ&eBAJ`YG`fGI&)Xgt*}Cf>(vt4R1HRhv4D`B$7*D_!-9(5R}s#ysP5j-QX3)IeI-C${S&0Sw_iWH zm2Z^l?Oe0GwYgF*QkUk}K2x;23ac^Co6Tj zP_N+Hm+-e=a{aG0N#f_vl8X;7l9zwX(Xjv>=;VyObQSyH1kwV|N5>7$UGB*nzlFrH zn};N8tux4lt<6HI*0h5CH$}Upuzz;5uifB+#I()e=)$gW|0hAnT%3^9j8KRz+}NtC z*oMH;+7<2vFA)E;R}$~-BwoxlOF|z;)NdSS-=IPymJQjF@+6HVs7FM7;AhpH5B~qv zk#6#iQjWcK3#9(be|+hA&rSZm;`tWq_o&8WwdfwJFHGuCyW6dYU4(t|g8)@c(f8Ti zYL=^pYxfoHYlYqNu`iAUbO+phW^;7aLwyPA60WMgv3EpP{8Z}{6xQl0{$lK;1@sRO z7=lv#gx&6yDS4rjdvm*1TCoZ0H;VQLg?-+`ZuUTSB&KZ!N6MqPCXDr~!``E)4=%Y6 z4HGg6gy0#Ugp6qPWQPm5p@ISlMh<^QF3w*&Ri@qp-Vv_P!r-VW5vsTBUvGadZqesswPyjR{qCl=`I)=dtpT zF2ara*87%-wR#&lB z$4y>jUs_P%9W=A=RtfvJF4KSfQKW2Xe^=4|sIWUH*k4XS+9epKU0Tk3cPSP5K!|Kg z#ahW#T2Sg#>h%{S)t=U7x#_IV8zwI*YT|52Md4(KW8>M#; zFFUbPpuK-cAgApL5@6+;g@L`4ScIh^heZctQ>JC9WO|j6fgg zg(Mz#77*%%9`=ouP~WavV_O>{3EZXxrqc#vwAk0P)qn5mF=p9U%bn9bURnmw-c*_h zn!i8LfA65~vj=kVJotOzL0;gV_buu<-F%+oUF3UTFAn%Nzz*&NKj1@`J+pI^J+HIx zKn^VJ!(*DCd9>^PK0^8fx{oK5uP^_|IKK32-XK0<9^;%Z!i&#OIKRRpYkZpXH6C3P zb&&|?uVE?5c^toBTVJrov8DjSU=Vbh$8OO@7|s{4+rlDrk6yqT*7+3YQ)oj2*xOLx zaWTmSEMfzHedj_ji00HZ^cGNPU{O2~EJnui)yio4+Ho2Nx z7NN25IDZ32Mz$>cND^({z#qXM3!G>0XJ7nt+4cNvs)BuZGzLjHbJ@Z(`Ci7R4!k9_ zi`B|bE}dM>hvqlH@9pgJ1o~df@@U2!Z}N*-d@cV)kUQ`e1m5yQtvDUt0N*#V>Bwk0 zRJqLOV__ryJYpny1LFzZr=rgJKxssb_@~0_g)ti*7x}>H47}+j3(q;f9Ov4Fw^N** ztL3W&j0aG_d_?2Hwwd3z5gOIDD_(|w)#*lYIklQe&#&e;Q_V~@wasIBh&Rp==k0|b z27{=K#kfdv0zn=v1hx%CIowL0OHLGK05cSBWkRVd5I>CnxXH(i zKj*l#V00wvtiLyLCc8eeGt;A4)Z(&e#)SmI%{*FnFa)XuRa~tN*lBYQ9pa)n3Wf>swa_+VE#b_zUU4*txT# VXGi<*!FBlw{}JvZWC8xt`yDIKP7eS8 diff --git a/cli/tests/resources/omop/public/OBSERVATION_PERIOD.parquet b/cli/tests/resources/omop/public/OBSERVATION_PERIOD.parquet deleted file mode 100644 index f0eddab8431a99c6075d4aa9da9bf803fae9533b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2183 zcmcguJ#X4j6n!{=NXigJeU`0Ki6vHtd=N*drBa7$52xV(VaEcbJNNbwRnh%$RLQXxDYmM}^lkHoYyPl!uul=){>g*Pj0tJN)c z>di*Uv0C+}S*n{{k|p_*M?Upv5iM{Xv$}xN7?()REDk8^e@gv5}o$f zNi?B~mm(3m)tKnd)2`?T6GU4Lb6oP*fcB9Z!Q~vg!_AUc-c6EBsN$tagl;t^`9si^ zET8vjr|(#$YO_(boX+L0IK8L+nnATL-5B{KwW=Luioadmw~oqu277v5x?+0_=G=Z9 z)YmWnd@y64?@00NBPq9nS%JVALQ1P=k0)Ug0eV3nntXLBUWz+ zsu6=uS-OY87@2>KH+h2CdVHPc2c#mU5{v?cUNOJnRP^kgnX-GuTSs%vVv3`Vs@Ee{nCG3=2Z;!3cV%2dL%dJ+k7w+O)6<_}0 XmDQ!lQbgSiF*f>Rs*kZP`~>+6((J0K diff --git a/cli/tests/resources/omop/public/PERSON.parquet b/cli/tests/resources/omop/public/PERSON.parquet deleted file mode 100644 index ae693885e0fb0302ecc507e0848168ee9591bf73..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5420 zcmcgw-D?|15Z{v(S$389BR=(%Dv6uMiy=-V%W)E=33yMk{1GL#RYzYGj4a9Whh(X; zEX95*eJCM>68aPGVX;Qd=QiT+6S{Eu}=0-C}!Lu zBBBW)bixNU{f6LT2HH0o|g#8Y4<2Gxj4!y#-3zdKHX9i;4c-qC@P; zq*qhv;#@MnnoJi;8q~Xj{k}kNw$NWp09MyjqqiYgx9HYLl?V|8>!o`mLhAq-zc&Iv z2>JEXkve*x}i*-)@ zEYQ!}=oVy>!jVDWHgg6kY2XwQZDJ>85aC&V&H_*@Jv>tg{odvjiKEX#9Fi@S*f3%^ zXTL~q+b+Qj2aJM0HB;T<*=>^ECHF4`zmuVIdY{dRpI`-D{NdX#8dR568-ea9W4IIQ zJY8B#)khsp(X5955D)uYDp5+8mU3w={#yclagcs>5UHxu*ThN#s(4Wb(N!oyboh|i zjm#G{Bm*$LafBbgR>e1U9O8t7r>~CT`4Z1A}O>rrJ>| zneho;@K2=$g6(Rz5c<<$z1lm9iDbIA*6#@Ry8`{An?CM_T1R~iweB&@2X-B1Pr9_Y zx|CchZ8XOEP^7oS9b+{tq5`bAC)UoS#Bl@r-@rP|QqXHNQ|b2ltLIaXy+-K82@ym^d1Kibsd?iJFX zy*k^H5m3$UsZ`Wo$LZ7KJI>M_#N_5%IzwH&vK#XA;|copiDvvbUUUu<$+DJQOy?5f zF|giesyeqQ?fOFk5!+#$al=()H5(SJTQJbDf53qTL^Z2*Ur+FMmvaD$43;|HZ(KGF z{fM!4DN~R(Tsf4k>LOJnhLVA;5VST*?LHE8A}Zc*j5|+#tshY}%?o9EeU#M6c8kO5 zc+s&3r^h`#?a}=@e$jn96a9~458by@$g3VK_V6%3_jm)}qYan7(jH28V_T%VvD49g z*q!LBZ#een9d-+~+Mjgx5cuXG*;ptv6}hNvKhmRi7hb2BpD{KAH(pm5t1vmk0*qyt zFT-WB%-B_Y^D#DopU~Dmp>+WzZkWago(@t9gI!mgG2GhT+=QWG-Nr%g)8{PiR#6r$y{JL9`*z} z{`w9+=6@V0?yETxFMQaU0)t4r&r7ZHI!f zQN%tjXOpYf%JWtDG(Y3h=UuMz%U+iYV=$_a2yA16KluZd$jhfyj|JTEXs9sHb2Wnn2&oA5hJ)~`)*lF8pQ*4FELG@{Wkr8&J@EPjy~9zD zHy8Cx7LuzWjsf!taBx>68vdE9Jg7WgRr%&eR>ua*^Dx(_$3Lao_pymhc;)%#SNP+q z^!PG2!W;(vHjFxA=pXnQgMPdw*o|#GN`2PlKdRR2(->FiDr@uA`~?tD{25%oAayY~ ziu3rQ*GDa0q*AWLqm!GEH(G4~H5J4QdU&d#0wurM<;z6sEhLKXt*1+;lZC?RM6sB! j^p@e*NBC0OJ2-U4eZ~#POE|{9{69GgzdKCB@1*|%$I7%Q6dt>q)Jc<&q*-?zmMRitghX4%4uJ;I!}vEzT@#1If0t;p>o`sw$Box9 z$*JN{aR8|lgg9{E&_fR#P{k1;1U;Y%RXrgg^@I>es6ql&oOy4&_O92Piqa;lcz55M zneTn?Z)TN6wiFNraabHnz=Mnu;*rPa3CY4wB;u&(J;R54#a>wwP%8;I?j}Jl929eeJ071?KrC26}3>+TXdUA|HMOoI}UVBEE}>T-Hvz$ zt~$jNwUU5$1T-ZpRfy+<6DC!uO;z8k7FKP1o8f)fPw(^tUlY&<@5pyTe5utw z;Qn79ebYn8hwh*C?|Zt#>}icip4MD2;-Izq0%3nNB#VO4Wg~50u*-DlH(ta)xV-mW z^w&Xpb5NEX5D&29=oIu~0_qsI2=rk{95r`_*91@=m#|CVjouoO#eRdC=Sp3v=p}uR z6I%A>>BC`qZ&-#Uj%GS?4S3!0LR*k)ykjy<3EtZ64Y~g6q?zmI>Xnl2oZ>s9^s`Y& zv58|tJ!G2idQF;=Qc?8nt&+N0E9nQ6~Xz#X6KbD_`y?dukd*hu|;%NBBY5M4egADr~&Jd}$ zrfw9r6mcB9o^;#J>B)ga8S>6YtVd&0W1d7!*=f^njvTnh!f6ghySJ^$vBdi$@)>MK zBKE_S_WscXv^TP0rZXbuR^A*wxJ-#S05+V9W|AaiWPvlfQ*0f5OhSxlUco2(9vsq# z5j~s_nM3*bIOLCoqd4G?^x*yfXXzoHfAk2>EP52@41G372Yvd}fIhV*_c3$4I%7C|}Dfa&3Rw6M&gFg1npd0e!Pw z4Ue0iOV^d`k^=@~dy4=vvpMVcBi(3>lMEw4?g&RiSHa(XHJ;v7GJ*11d3JwoI=z<* zrq{KtSWU~|IL=RD+!#wPxA4Nwc$Z;P=(3VcmUHj}AE&_IfEt+HTno-@=Ch%xY_N=z z*!<$_&3LgW&s4l6r{VRkek{a`W?|WoqY>_Z$QDm592R;<%f#whygXIlo`l1O{V==K zUVqU!eyHup$7P6L8L;#`$0|ZyvN*4AB#l?m$R|>XYRNPt3+o4@mcKQeX7M+NvN|0q z=YoKnnOWE7%8h*5<~vS`?E?}o_on}`WkZe{`RAm#D>$8nu+=*F+v2&|iMJ5x#OsPD zuct;nyRZ-Sv#w;&UgQ!ivg9`sHRP7RyuK{{@>aC^i#X8eK(@%hBu_2UXS`mp*!eib zS;cts;I*Ah6}i*)evF`YF4@RulGU|Lx}FU#ZD{co+>Zc1htH0SNE20M=SsbxpH-{X ov&zoSc4J}}{y9?Gu1o}`&rO}1@;!i0xBv1-hE5Q&2>%587s*=0S^xk5 diff --git a/cli/tests/resources/omop/public/VISIT_DETAIL.parquet b/cli/tests/resources/omop/public/VISIT_DETAIL.parquet deleted file mode 100644 index da20bd662a52d035bd75a55b9ffa10a3c043faaf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3228 zcmbtX%Wvak5cg(xyA6$mSa3+B2=!7+2<;|K6|q6u!#K}&OSds>UYE$VomU;l}eEioXqltoOfzIm)q`y@o+lwng8=J!2ITs7t`Cm zn0%Fbvl9UE?XCwAHnq`sX!y(g@1p?Yo3LjbQRI=B(w0O_ZQhxD#ZW~eCB5ke|U(R~;3!NmW_aIu}S($xks zAH%zK5ZZW^*6q%($3f&&r;9}h>vg|)B2dsAA3=~+jUI5P335TZi{f+F$@Q|TY(jIP z*mGa)?WN7-wD&hu$lhF+bS}!p6Ynu1II3PLgX*-AuD0b`1w_{#3XEyh zojZ6Kj1R9!r^;fNFw|l;E!WRX{Jd<|rLJ82l3_yO^K3Htj0{DL2H?b<7jlerj!4=0 zpaF=N^s*4M->gaI#XxPV1N~aAOYLTA7g#d zWD_4>JtSQ%70rtx<1}{s128_hF-Z%NL{XyBiY#@@^<}9nNnOOj__Bw};7ljo;bBZg zg;azO%lAB#^7Th^VIc537 zH3x5nFG*n6&0IIvgJ8F$shw67c9N!PyR2*CmNc1A(%dBdNSc_Y ztu6a85JW^}zht0@43Q}!GT4x@AAX>S_{$$dWQYuZi0sGwz#jvL?|W{Nlib|y1D0NT zd++<6bDsOW&-?Cb}YL^dNCJ3N4uMOI{22pA!2TmRpW6JiBQf5NkwF}5leY)8hz zYGAAq`kqBFwrh-21t422SLvQJ0b}|}8~w)&gawig*O0JX?haubKv;4m#tDfAIoU4R zQ7Cr(P+xMf?WQ+fE&#cELg70G6^C9Sl(68f zCKODMTYKt+Qs3mY|0Tz8`!&8XX(zM$*xW!)7XA9*ZV%Y*{h8tC1cj%2rT>Yqu z2BF&p>H!Q^)289Z>y`#BmI=*nn3bAq(+NOi8-#F&a_l1^S2YHuF&Se0te`YD*XOE& z;*gv!=e3re2wQo0Yvo~!Zt>Ztbw?Dsr6+}!@9wk}J2Gw2F1i)F-h`axC_&w!J7fI^1|2%8oF`!f@ktgt0Z|%d%0%vdo|5KA(@V zqxj}$Y#g6pSD#>c(B*+)YzW;)!qCkZhT$xT_9S-qAH^?3Spb@{V2|+cE#Sxie}x8k zof>qz0f&W8aAydeHEMu|e1ef;$~#$zPfsi*Vv&p*9L}V?!vm@OWNvyeF*iFF%^_0^ z?RX7u+!znx4e;XL;gz^Ix~7f}7Zccr`2y~Y+s$8Jz~BCG#rF9k+>5L+(CU@Mmv}5v zPEJQW$TP7tu{s+a-k2SXt^;;P&5tgtu@lfMjN^Fxo_pdux;PuldDLKZaoU?$pY;x8 zn)!~5H}YjC+^d|wkIf-7%uWMep5M54bUgtafpc!!n_I*_kgF%%?q~4D$CDn|sMrBN zC+*{*^H=P|7!LETC8j4fX1zn1WQ_QO9y?z-s~U z7827wPnDZG^|V$mEWqPgtJ>iL|L6pQ)YT8~HI{(-$pJ=mB{4d@7>8KZV6L2i5pz>e zG#E}+%Lmpp3#gMoCV=$>md~$|)a!Q)+G>0gu_4G!WKIo2e#c5MlIzEgRq-qQxaf8K zVA4VV3ctZg57apDM4YH685@AQ#w*KU!Up;L)EowU{o^{mdVRS*Ree)~k-5|o)M#+j zgF3+chg8S$S-D1P@mK1H*G65w8t|Iy(eJ77spKDvK8))L#F4^yc=BsS6gHl(0nA5L zAMfx&9OhCgAH})K>kRNf(kxV;wmxz4yx!RJMCmOiOE0aY%Lh`$;(=tTR9Nd>g}+$m d3roENg9kkaJ Date: Thu, 21 Dec 2023 11:47:21 +0100 Subject: [PATCH 26/28] Raise more appropriate exceptions for missing file or directory --- cli/src/pixl_cli/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 46c95f10b..ddf85882a 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -318,11 +318,11 @@ def messages_from_parquet(dir_path: Path) -> list[Message]: for d in [public_dir, private_dir]: if not d.is_dir(): err_str = f"{d} must exist and be a directory" - raise ValueError(err_str) + raise NotADirectoryError(err_str) if not log_file.is_file(): err_str = f"{log_file} must exist and be a file" - raise ValueError(err_str) + raise FileNotFoundError(err_str) # MRN in people.PrimaryMrn: people = pd.read_parquet(private_dir / "PERSON_LINKS.parquet") From 76c3897a57a3e5596418a77b9ba0c3c3e35b7529 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 11:50:38 +0100 Subject: [PATCH 27/28] Print log message about messages created during regular runs --- cli/src/pixl_cli/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index ddf85882a..b048376fa 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -382,7 +382,7 @@ def messages_from_parquet(dir_path: Path) -> list[Message]: msg = f"Failed to find any messages in {dir_path}" raise ValueError(msg) - logger.debug(f"Created {len(messages)} messages from {dir_path}") + logger.info(f"Created {len(messages)} messages from {dir_path}") return messages From 845e82685e2ccae85e56549e734a72ffb12376f4 Mon Sep 17 00:00:00 2001 From: Milan Malfait Date: Thu, 21 Dec 2023 11:51:38 +0100 Subject: [PATCH 28/28] Update error message for parquet files --- cli/src/pixl_cli/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index b048376fa..75763b8b2 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -348,7 +348,10 @@ def messages_from_parquet(dir_path: Path) -> list[Message]: for col in expected_col_names: if col not in list(cohort_data.columns): - msg = f"csv file expected to have at least {expected_col_names} as " f"column names" + msg = ( + f"parquet files are expected to have at least {expected_col_names} as " + f"column names" + ) raise ValueError(msg) (