From 735eb9fa3219ebe5e11ce0b07f9ee7ec0270c07d Mon Sep 17 00:00:00 2001 From: Denis <104531605+dcanbwell@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:49:34 -0500 Subject: [PATCH 1/4] attempting to fix OutputFileValidator so that whatever views are specified via ignore_views_for_output are not used to generate the output files and schema and are not used for comparison --- .../test_classes/validator_types.py | 4 +- .../fixed_width_pipeline/v1/output/books.json | 110 ++++++++++++++++++ .../v1/output/my_view.json | 8 ++ .../v1/output/my_xml_view.json | 110 ++++++++++++++++++ .../v1/output_schema/books.json | 48 ++++++++ .../v1/output_schema/my_view.json | 30 +++++ .../v1/output_schema/my_xml_view.json | 48 ++++++++ .../v1/test_fixed_width_pipeline.py | 6 +- 8 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output/books.json create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output/my_view.json create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output/my_xml_view.json create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output_schema/books.json create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_view.json create mode 100644 tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_xml_view.json diff --git a/spark_pipeline_framework_testing/test_classes/validator_types.py b/spark_pipeline_framework_testing/test_classes/validator_types.py index 22a2cf2..fd28c93 100644 --- a/spark_pipeline_framework_testing/test_classes/validator_types.py +++ b/spark_pipeline_framework_testing/test_classes/validator_types.py @@ -735,7 +735,8 @@ def validate( output_files: List[str] = [ f for f in listdir(self.output_folder_path) - if isfile(join(self.output_folder_path, f)) + if isfile(join(self.output_folder_path, f)) # and file without extension is not in ignore_views_for_output + and f.split(".")[0] not in self.ignore_views_for_output ] views_found: List[str] = [] data_frame_exceptions: List[SparkDataFrameComparerException] = [] @@ -764,6 +765,7 @@ def validate( if table_name.lower() not in views_found and not table_name.startswith("expected_") and table_name not in self.input_table_names + and table_name not in self.ignore_views_for_output ] if ( "output" in table_names_to_write_to_output diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output/books.json b/tests/library/pipeline/fixed_width_pipeline/v1/output/books.json new file mode 100644 index 0000000..f9f87c3 --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output/books.json @@ -0,0 +1,110 @@ +[ + { + "_id": "bk101", + "author": "Gambardella, Matthew", + "description": "\n\n\n An in-depth look at creating applications\n with XML.This manual describes Oracle XML DB, and how you can use it to store, generate, manipulate, manage,\n and query XML data in the database.\n\n\n After introducing you to the heart of Oracle XML DB, namely the XMLType framework and Oracle XML DB repository,\n the manual provides a brief introduction to design criteria to consider when planning your Oracle XML DB\n application. It provides examples of how and where you can use Oracle XML DB.\n\n\n The manual then describes ways you can store and retrieve XML data using Oracle XML DB, APIs for manipulating\n XMLType data, and ways you can view, generate, transform, and search on existing XML data. The remainder of\n the manual discusses how to use Oracle XML DB repository, including versioning and security,\n how to access and manipulate repository resources using protocols, SQL, PL/SQL, or Java, and how to manage\n your Oracle XML DB application using Oracle Enterprise Manager. It also introduces you to XML messaging and\n Oracle Streams Advanced Queuing XMLType support.\n ", + "genre": "Computer", + "price": 44.95, + "publish_date": "2000-10-01", + "title": "XML Developer's Guide" + }, + { + "_id": "bk102", + "author": "Ralls, Kim", + "description": "A former architect battles corporate zombies,\n an evil sorceress, and her own childhood to become queen\n of the world.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2000-12-16", + "title": "Midnight Rain" + }, + { + "_id": "bk103", + "author": "Corets, Eva", + "description": "After the collapse of a nanotechnology\n society in England, the young survivors lay the\n foundation for a new society.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2000-11-17", + "title": "Maeve Ascendant" + }, + { + "_id": "bk104", + "author": "Corets, Eva", + "description": "In post-apocalypse England, the mysterious\n agent known only as Oberon helps to create a new life\n for the inhabitants of London. Sequel to Maeve\n Ascendant.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2001-03-10", + "title": "Oberon's Legacy" + }, + { + "_id": "bk105", + "author": "Corets, Eva", + "description": "The two daughters of Maeve, half-sisters,\n battle one another for control of England. Sequel to\n Oberon's Legacy.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2001-09-10", + "title": "The Sundered Grail" + }, + { + "_id": "bk106", + "author": "Randall, Cynthia", + "description": "When Carla meets Paul at an ornithology\n conference, tempers fly as feathers get ruffled.", + "genre": "Romance", + "price": 4.95, + "publish_date": "2000-09-02", + "title": "Lover Birds" + }, + { + "_id": "bk107", + "author": "Thurman, Paula", + "description": "A deep sea diver finds true love twenty\n thousand leagues beneath the sea.", + "genre": "Romance", + "price": 4.95, + "publish_date": "2000-11-02", + "title": "Splish Splash" + }, + { + "_id": "bk108", + "author": "Knorr, Stefan", + "description": "An anthology of horror stories about roaches,\n centipedes, scorpions and other insects.", + "genre": "Horror", + "price": 4.95, + "publish_date": "2000-12-06", + "title": "Creepy Crawlies" + }, + { + "_id": "bk109", + "author": "Kress, Peter", + "description": "After an inadvertant trip through a Heisenberg\n Uncertainty Device, James Salway discovers the problems\n of being quantum.", + "genre": "Science Fiction", + "price": 6.95, + "publish_date": "2000-11-02", + "title": "Paradox Lost" + }, + { + "_id": "bk110", + "author": "O'Brien, Tim", + "description": "Microsoft's .NET initiative is explored in\n detail in this deep programmer's reference.", + "genre": "Computer", + "price": 36.95, + "publish_date": "2000-12-09", + "title": "Microsoft .NET: The Programming Bible" + }, + { + "_id": "bk111", + "author": "O'Brien, Tim", + "description": "The Microsoft MSXML3 parser is covered in\n detail, with attention to XML DOM interfaces, XSLT processing,\n SAX and more.", + "genre": "Computer", + "price": 36.95, + "publish_date": "2000-12-01", + "title": "MSXML3: A Comprehensive Guide" + }, + { + "_id": "bk112", + "author": "Galos, Mike", + "description": "Microsoft Visual Studio 7 is explored in depth,\n looking at how Visual Basic, Visual C++, C#, and ASP+ are\n integrated into a comprehensive development\n environment.", + "genre": "Computer", + "price": 49.95, + "publish_date": "2001-04-16", + "title": "Visual Studio 7: A Comprehensive Guide" + } +] diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output/my_view.json b/tests/library/pipeline/fixed_width_pipeline/v1/output/my_view.json new file mode 100644 index 0000000..8005829 --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output/my_view.json @@ -0,0 +1,8 @@ +[ + { + "id": "002", + "some_date": "01302017", + "some_string": "me", + "some_integer": 5678 + } +] diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output/my_xml_view.json b/tests/library/pipeline/fixed_width_pipeline/v1/output/my_xml_view.json new file mode 100644 index 0000000..f9f87c3 --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output/my_xml_view.json @@ -0,0 +1,110 @@ +[ + { + "_id": "bk101", + "author": "Gambardella, Matthew", + "description": "\n\n\n An in-depth look at creating applications\n with XML.This manual describes Oracle XML DB, and how you can use it to store, generate, manipulate, manage,\n and query XML data in the database.\n\n\n After introducing you to the heart of Oracle XML DB, namely the XMLType framework and Oracle XML DB repository,\n the manual provides a brief introduction to design criteria to consider when planning your Oracle XML DB\n application. It provides examples of how and where you can use Oracle XML DB.\n\n\n The manual then describes ways you can store and retrieve XML data using Oracle XML DB, APIs for manipulating\n XMLType data, and ways you can view, generate, transform, and search on existing XML data. The remainder of\n the manual discusses how to use Oracle XML DB repository, including versioning and security,\n how to access and manipulate repository resources using protocols, SQL, PL/SQL, or Java, and how to manage\n your Oracle XML DB application using Oracle Enterprise Manager. It also introduces you to XML messaging and\n Oracle Streams Advanced Queuing XMLType support.\n ", + "genre": "Computer", + "price": 44.95, + "publish_date": "2000-10-01", + "title": "XML Developer's Guide" + }, + { + "_id": "bk102", + "author": "Ralls, Kim", + "description": "A former architect battles corporate zombies,\n an evil sorceress, and her own childhood to become queen\n of the world.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2000-12-16", + "title": "Midnight Rain" + }, + { + "_id": "bk103", + "author": "Corets, Eva", + "description": "After the collapse of a nanotechnology\n society in England, the young survivors lay the\n foundation for a new society.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2000-11-17", + "title": "Maeve Ascendant" + }, + { + "_id": "bk104", + "author": "Corets, Eva", + "description": "In post-apocalypse England, the mysterious\n agent known only as Oberon helps to create a new life\n for the inhabitants of London. Sequel to Maeve\n Ascendant.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2001-03-10", + "title": "Oberon's Legacy" + }, + { + "_id": "bk105", + "author": "Corets, Eva", + "description": "The two daughters of Maeve, half-sisters,\n battle one another for control of England. Sequel to\n Oberon's Legacy.", + "genre": "Fantasy", + "price": 5.95, + "publish_date": "2001-09-10", + "title": "The Sundered Grail" + }, + { + "_id": "bk106", + "author": "Randall, Cynthia", + "description": "When Carla meets Paul at an ornithology\n conference, tempers fly as feathers get ruffled.", + "genre": "Romance", + "price": 4.95, + "publish_date": "2000-09-02", + "title": "Lover Birds" + }, + { + "_id": "bk107", + "author": "Thurman, Paula", + "description": "A deep sea diver finds true love twenty\n thousand leagues beneath the sea.", + "genre": "Romance", + "price": 4.95, + "publish_date": "2000-11-02", + "title": "Splish Splash" + }, + { + "_id": "bk108", + "author": "Knorr, Stefan", + "description": "An anthology of horror stories about roaches,\n centipedes, scorpions and other insects.", + "genre": "Horror", + "price": 4.95, + "publish_date": "2000-12-06", + "title": "Creepy Crawlies" + }, + { + "_id": "bk109", + "author": "Kress, Peter", + "description": "After an inadvertant trip through a Heisenberg\n Uncertainty Device, James Salway discovers the problems\n of being quantum.", + "genre": "Science Fiction", + "price": 6.95, + "publish_date": "2000-11-02", + "title": "Paradox Lost" + }, + { + "_id": "bk110", + "author": "O'Brien, Tim", + "description": "Microsoft's .NET initiative is explored in\n detail in this deep programmer's reference.", + "genre": "Computer", + "price": 36.95, + "publish_date": "2000-12-09", + "title": "Microsoft .NET: The Programming Bible" + }, + { + "_id": "bk111", + "author": "O'Brien, Tim", + "description": "The Microsoft MSXML3 parser is covered in\n detail, with attention to XML DOM interfaces, XSLT processing,\n SAX and more.", + "genre": "Computer", + "price": 36.95, + "publish_date": "2000-12-01", + "title": "MSXML3: A Comprehensive Guide" + }, + { + "_id": "bk112", + "author": "Galos, Mike", + "description": "Microsoft Visual Studio 7 is explored in depth,\n looking at how Visual Basic, Visual C++, C#, and ASP+ are\n integrated into a comprehensive development\n environment.", + "genre": "Computer", + "price": 49.95, + "publish_date": "2001-04-16", + "title": "Visual Studio 7: A Comprehensive Guide" + } +] diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/books.json b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/books.json new file mode 100644 index 0000000..fe229e2 --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/books.json @@ -0,0 +1,48 @@ +{ + "type": "struct", + "fields": [ + { + "name": "_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "author", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "description", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "genre", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "price", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "publish_date", + "type": "date", + "nullable": true, + "metadata": {} + }, + { + "name": "title", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_view.json b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_view.json new file mode 100644 index 0000000..4ca8e5e --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_view.json @@ -0,0 +1,30 @@ +{ + "type": "struct", + "fields": [ + { + "name": "id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "some_date", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "some_string", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "some_integer", + "type": "integer", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_xml_view.json b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_xml_view.json new file mode 100644 index 0000000..fe229e2 --- /dev/null +++ b/tests/library/pipeline/fixed_width_pipeline/v1/output_schema/my_xml_view.json @@ -0,0 +1,48 @@ +{ + "type": "struct", + "fields": [ + { + "name": "_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "author", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "description", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "genre", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "price", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "publish_date", + "type": "date", + "nullable": true, + "metadata": {} + }, + { + "name": "title", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py b/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py index 960a42b..06f72cf 100644 --- a/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py +++ b/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py @@ -13,7 +13,7 @@ SparkPipelineFrameworkTestRunnerV2, ) -from spark_pipeline_framework_testing.test_classes import input_types +from spark_pipeline_framework_testing.test_classes import input_types, validator_types def test_fixed_width_pipeline( @@ -59,7 +59,9 @@ def test_fixed_width_pipeline( spark_session=spark_session, test_path=data_dir, test_name=test_name, - test_validators=[], + test_validators=[validator_types.OutputFileValidator( + output_folder="output", ignore_views_for_output=["test"] + )], logger=logger, test_inputs=[test_input], temp_folder="output/temp", From b6490cd6e42385de066da47885c5f2b392453224 Mon Sep 17 00:00:00 2001 From: Denis <104531605+dcanbwell@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:04:50 -0500 Subject: [PATCH 2/4] changed to the proper way of stripping out the file extension to see which views should be ignored based on the ignore_views_for_output setting --- .../test_classes/validator_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark_pipeline_framework_testing/test_classes/validator_types.py b/spark_pipeline_framework_testing/test_classes/validator_types.py index fd28c93..3f3e4e3 100644 --- a/spark_pipeline_framework_testing/test_classes/validator_types.py +++ b/spark_pipeline_framework_testing/test_classes/validator_types.py @@ -735,8 +735,8 @@ def validate( output_files: List[str] = [ f for f in listdir(self.output_folder_path) - if isfile(join(self.output_folder_path, f)) # and file without extension is not in ignore_views_for_output - and f.split(".")[0] not in self.ignore_views_for_output + if isfile(join(self.output_folder_path, f)) + and Path(f).stem not in self.ignore_views_for_output ] views_found: List[str] = [] data_frame_exceptions: List[SparkDataFrameComparerException] = [] From d66715c4eeef9b0fcae53169a6a671b543e65d69 Mon Sep 17 00:00:00 2001 From: Denis <104531605+dcanbwell@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:29:57 -0500 Subject: [PATCH 3/4] ignore mypy's unnecessary complaints --- .../test_classes/validator_types.py | 10 ++++++---- .../v1/test_fixed_width_pipeline.py | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/spark_pipeline_framework_testing/test_classes/validator_types.py b/spark_pipeline_framework_testing/test_classes/validator_types.py index 3f3e4e3..e40261c 100644 --- a/spark_pipeline_framework_testing/test_classes/validator_types.py +++ b/spark_pipeline_framework_testing/test_classes/validator_types.py @@ -732,11 +732,13 @@ def validate( if not os.path.exists(self.output_folder_path): os.mkdir(self.output_folder_path) # noinspection PyTypeChecker - output_files: List[str] = [ + output_files = [ f for f in listdir(self.output_folder_path) - if isfile(join(self.output_folder_path, f)) - and Path(f).stem not in self.ignore_views_for_output + if ( + isfile(join(self.output_folder_path, f)) + and Path(f).stem not in self.ignore_views_for_output # type: ignore + ) ] views_found: List[str] = [] data_frame_exceptions: List[SparkDataFrameComparerException] = [] @@ -765,7 +767,7 @@ def validate( if table_name.lower() not in views_found and not table_name.startswith("expected_") and table_name not in self.input_table_names - and table_name not in self.ignore_views_for_output + and table_name not in self.ignore_views_for_output # type: ignore ] if ( "output" in table_names_to_write_to_output diff --git a/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py b/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py index 06f72cf..11babbb 100644 --- a/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py +++ b/tests/library/pipeline/fixed_width_pipeline/v1/test_fixed_width_pipeline.py @@ -59,9 +59,11 @@ def test_fixed_width_pipeline( spark_session=spark_session, test_path=data_dir, test_name=test_name, - test_validators=[validator_types.OutputFileValidator( - output_folder="output", ignore_views_for_output=["test"] - )], + test_validators=[ + validator_types.OutputFileValidator( + output_folder="output", ignore_views_for_output=["test"] + ) + ], logger=logger, test_inputs=[test_input], temp_folder="output/temp", From 0a2300b3f3a1a0aa1a905fe20448363a73a19747 Mon Sep 17 00:00:00 2001 From: Denis <104531605+dcanbwell@users.noreply.github.com> Date: Fri, 6 Sep 2024 08:07:03 -0500 Subject: [PATCH 4/4] changed the default value from None to an empty list for ignore_views_for_output of the OutputFileValidator (previous mypy complaints turned out to be totally valid; my bad) --- .../test_classes/validator_types.py | 8 +- .../practitioner/v1/test_practitioner.py | 1 - .../bwellproviderfeed_08122020.json | 180 ++++++++++++++++++ .../response_text/input_schema/diagnosis.json | 24 +++ .../response_text/input_schema/members.json | 24 +++ .../v2/response_text/input_schema/output.json | 162 ++++++++++++++++ .../response_text/input_schema/patient.json | 30 +++ .../v2/response_text/input_schema/simple.json | 12 ++ .../v2/response_text/input_schema/test.json | 162 ++++++++++++++++ 9 files changed, 598 insertions(+), 5 deletions(-) create mode 100644 tests/library/features/v2/response_text/input_schema/bwellproviderfeed_08122020.json create mode 100644 tests/library/features/v2/response_text/input_schema/diagnosis.json create mode 100644 tests/library/features/v2/response_text/input_schema/members.json create mode 100644 tests/library/features/v2/response_text/input_schema/output.json create mode 100644 tests/library/features/v2/response_text/input_schema/patient.json create mode 100644 tests/library/features/v2/response_text/input_schema/simple.json create mode 100644 tests/library/features/v2/response_text/input_schema/test.json diff --git a/spark_pipeline_framework_testing/test_classes/validator_types.py b/spark_pipeline_framework_testing/test_classes/validator_types.py index e40261c..5033b87 100644 --- a/spark_pipeline_framework_testing/test_classes/validator_types.py +++ b/spark_pipeline_framework_testing/test_classes/validator_types.py @@ -639,7 +639,7 @@ def __init__( sort_output_by: Optional[List[str]] = None, output_as_json_only: bool = True, apply_schema_to_output: bool = True, - ignore_views_for_output: Optional[List[str]] = None, + ignore_views_for_output: List[str] = [], output_folder: str = "output", output_schema_folder: str = "output_schema", output_schema: Optional[ @@ -709,7 +709,7 @@ def validate( if not table_name.startswith("expected_") and table_name not in self.input_table_names ] - if self.ignore_views_for_output is not None: + if self.ignore_views_for_output: output_tables_for_writing_schema = [ table for table in output_tables_for_writing_schema @@ -737,7 +737,7 @@ def validate( for f in listdir(self.output_folder_path) if ( isfile(join(self.output_folder_path, f)) - and Path(f).stem not in self.ignore_views_for_output # type: ignore + and Path(f).stem not in self.ignore_views_for_output ) ] views_found: List[str] = [] @@ -767,7 +767,7 @@ def validate( if table_name.lower() not in views_found and not table_name.startswith("expected_") and table_name not in self.input_table_names - and table_name not in self.ignore_views_for_output # type: ignore + and table_name not in self.ignore_views_for_output ] if ( "output" in table_names_to_write_to_output diff --git a/tests/library/features/v2/doctor_feature/practitioner/v1/test_practitioner.py b/tests/library/features/v2/doctor_feature/practitioner/v1/test_practitioner.py index 15410e4..58f3b2b 100644 --- a/tests/library/features/v2/doctor_feature/practitioner/v1/test_practitioner.py +++ b/tests/library/features/v2/doctor_feature/practitioner/v1/test_practitioner.py @@ -24,7 +24,6 @@ def test_practitioner(spark_session: SparkSession) -> None: input_file = FileInput() - logger = get_logger(__name__) logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, diff --git a/tests/library/features/v2/response_text/input_schema/bwellproviderfeed_08122020.json b/tests/library/features/v2/response_text/input_schema/bwellproviderfeed_08122020.json new file mode 100644 index 0000000..a7ccacd --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/bwellproviderfeed_08122020.json @@ -0,0 +1,180 @@ +{ + "type": "struct", + "fields": [ + { + "name": "provider_last_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "provider_first_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "provider_middle_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "provider_gender", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "provider_title", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "provider_npi", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "gecb_provider_number", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "age_group_seen", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "primary_care_physician", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "speciality_care_provider", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "preferred_provider", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "accept_new_patients", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "accept_medstar_select_insurance", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "telehealth_provider", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "alias", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "facility", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_address1", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_address2", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_city", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_state", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_zip", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_mnem", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_location_number", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_department_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_department_mnem", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "scheduling_department_number", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "practice_name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "customer_friendly_practice_name", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/diagnosis.json b/tests/library/features/v2/response_text/input_schema/diagnosis.json new file mode 100644 index 0000000..238dfad --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/diagnosis.json @@ -0,0 +1,24 @@ +{ + "type": "struct", + "fields": [ + { + "name": "diagnosis_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "member_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "icd_code", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/members.json b/tests/library/features/v2/response_text/input_schema/members.json new file mode 100644 index 0000000..e0a52e7 --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/members.json @@ -0,0 +1,24 @@ +{ + "type": "struct", + "fields": [ + { + "name": "member_id", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "birth_date", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/output.json b/tests/library/features/v2/response_text/input_schema/output.json new file mode 100644 index 0000000..bbc68fe --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/output.json @@ -0,0 +1,162 @@ +{ + "type": "struct", + "fields": [ + { + "name": "_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "about", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "address", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "age", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "balance", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "company", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "email", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "eyeColor", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "favoriteFruit", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "friends", + "type": { + "type": "array", + "elementType": { + "type": "struct", + "fields": [ + { + "name": "id", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + } + ] + }, + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "gender", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "greeting", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "guid", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "index", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "isActive", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "latitude", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "longitude", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "phone", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "picture", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "registered", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "tags", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/patient.json b/tests/library/features/v2/response_text/input_schema/patient.json new file mode 100644 index 0000000..506b152 --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/patient.json @@ -0,0 +1,30 @@ +{ + "type": "struct", + "fields": [ + { + "name": "member_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "birth_date", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "null_column", + "type": "string", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/simple.json b/tests/library/features/v2/response_text/input_schema/simple.json new file mode 100644 index 0000000..1527463 --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/simple.json @@ -0,0 +1,12 @@ +{ + "type": "struct", + "fields": [ + { + "name": "simple_field", + "type": "long", + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file diff --git a/tests/library/features/v2/response_text/input_schema/test.json b/tests/library/features/v2/response_text/input_schema/test.json new file mode 100644 index 0000000..bbc68fe --- /dev/null +++ b/tests/library/features/v2/response_text/input_schema/test.json @@ -0,0 +1,162 @@ +{ + "type": "struct", + "fields": [ + { + "name": "_id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "about", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "address", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "age", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "balance", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "company", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "email", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "eyeColor", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "favoriteFruit", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "friends", + "type": { + "type": "array", + "elementType": { + "type": "struct", + "fields": [ + { + "name": "id", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + } + ] + }, + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "gender", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "greeting", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "guid", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "index", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "isActive", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "latitude", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "longitude", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "phone", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "picture", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "registered", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "tags", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + } + ], + "$schema": "https://raw.githubusercontent.com/imranq2/SparkPipelineFramework.Testing/main/spark_json_schema.json " +} \ No newline at end of file