From cd5bec181f78fcc1e026c90f0833cee2a4dae381 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 3 Dec 2024 11:45:35 -0500 Subject: [PATCH 1/4] rewrite tests to use a new dmrparser_factory --- virtualizarr/tests/test_readers/test_dmrpp.py | 97 +++++++++++++------ 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py index cbafc40f..293ad9d3 100644 --- a/virtualizarr/tests/test_readers/test_dmrpp.py +++ b/virtualizarr/tests/test_readers/test_dmrpp.py @@ -1,5 +1,6 @@ import textwrap from pathlib import Path +from typing import Callable, Generator from xml.etree import ElementTree as ET import numpy as np @@ -21,9 +22,8 @@ ] -@pytest.fixture -def basic_dmrpp() -> DMRParser: - xml_str = """\ +BASIC_DMRPP_XML_STR = textwrap.dedent( + """\ @@ -111,12 +111,11 @@ def basic_dmrpp() -> DMRParser: """ - return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str))) +) -@pytest.fixture -def nested_groups_dmrpp() -> DMRParser: - xml_str = """\ +NESTED_GROUPS_DMRPP_XML_STR = textwrap.dedent( + """\ @@ -166,7 +165,25 @@ def nested_groups_dmrpp() -> DMRParser: """ - return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str))) +) + + +@pytest.fixture +def dmrparser_factory( + tmp_path: Path, +) -> Generator[ + Callable[[str], DMRParser], + None, + None, +]: + def _dmrparser(dmrpp_xml_str: str) -> DMRParser: + # TODO should we actually create then read a dmrpp file in this temporary directory? + # seems a bit pointless if all we're returning is the DMRParser object + + # TODO do we need to adjust the data_filepath, e.g. to end with .nc? + return DMRParser(root=ET.fromstring(dmrpp_xml_str), data_filepath=tmp_path) + + yield _dmrparser @network @@ -179,36 +196,38 @@ def test_NASA_dmrpp(data_url, dmrpp_url): @pytest.mark.parametrize( - "dmrpp_fixture, fqn_path, expected_xpath", + "dmrpp_xml_str, fqn_path, expected_xpath", [ - ("basic_dmrpp", "/", "."), - ("basic_dmrpp", "/data", "./*[@name='data']"), - ("basic_dmrpp", "/data/items", "./*[@name='data']/*[@name='items']"), + (BASIC_DMRPP_XML_STR, "/", "."), + (BASIC_DMRPP_XML_STR, "/data", "./*[@name='data']"), + (BASIC_DMRPP_XML_STR, "/data/items", "./*[@name='data']/*[@name='items']"), ( - "nested_groups_dmrpp", + NESTED_GROUPS_DMRPP_XML_STR, "/group1/group2/area", "./*[@name='group1']/*[@name='group2']/*[@name='area']", ), ], ) -def test_find_node_fqn(request, dmrpp_fixture, fqn_path, expected_xpath): - parser_instance = request.getfixturevalue(dmrpp_fixture) +def test_find_node_fqn(dmrparser_factory, dmrpp_xml_str, fqn_path, expected_xpath): + parser_instance = dmrparser_factory(dmrpp_xml_str) result = parser_instance.find_node_fqn(fqn_path) expected = parser_instance.root.find(expected_xpath, parser_instance._NS) assert result == expected +# TODO change how this is parametrized to only use a string name not the entire DMRPP XML string @pytest.mark.parametrize( - "dmrpp_fixture, group_path", + "dmrpp_xml_str, group_path", [ - ("basic_dmrpp", "/"), - ("nested_groups_dmrpp", "/"), - ("nested_groups_dmrpp", "/group1"), - ("nested_groups_dmrpp", "/group1/group2"), + (BASIC_DMRPP_XML_STR, "/"), + (NESTED_GROUPS_DMRPP_XML_STR, "/"), + (NESTED_GROUPS_DMRPP_XML_STR, "/group1"), + (NESTED_GROUPS_DMRPP_XML_STR, "/group1/group2"), ], ) -def test_split_groups(request, dmrpp_fixture, group_path): - dmrpp_instance = request.getfixturevalue(dmrpp_fixture) +def test_split_groups(dmrparser_factory, dmrpp_xml_str, group_path): + dmrpp_instance = dmrparser_factory(dmrpp_xml_str) + # get all tags in a dataset (so all tags excluding nested groups) dataset_tags = lambda x: [ d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group" @@ -221,21 +240,28 @@ def test_split_groups(request, dmrpp_fixture, group_path): assert result_tags == expected_tags -def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp): +def test_parse_dataset(dmrparser_factory): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + vds = basic_dmrpp.parse_dataset() assert vds.sizes == {"x": 720, "y": 1440, "z": 3} assert vds.data_vars.keys() == {"data", "mask"} assert vds.data_vars["data"].dims == ("x", "y") assert vds.attrs == {"Conventions": "CF-1.6", "title": "Sample Dataset"} assert vds.coords.keys() == {"x", "y", "z"} + + nested_groups_dmrpp = dmrparser_factory(NESTED_GROUPS_DMRPP_XML_STR) + vds_root_implicit = nested_groups_dmrpp.parse_dataset() vds_root = nested_groups_dmrpp.parse_dataset(group="/") xrt.assert_identical(vds_root_implicit, vds_root) assert vds_root.sizes == {"a": 10, "b": 10} assert vds_root.coords.keys() == {"a", "b"} + vds_g1 = nested_groups_dmrpp.parse_dataset(group="/group1") assert vds_g1.sizes == {"x": 720, "y": 1440} assert vds_g1.coords.keys() == {"x", "y"} + vds_g2 = nested_groups_dmrpp.parse_dataset(group="/group1/group2") assert vds_g2.sizes == {"x": 720, "y": 1440} assert vds_g2.data_vars.keys() == {"area"} @@ -249,13 +275,17 @@ def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp): ("/group1/x", {"x": 720}), ], ) -def test_parse_dim(nested_groups_dmrpp, dim_path, expected): +def test_parse_dim(dmrparser_factory, dim_path, expected): + nested_groups_dmrpp = dmrparser_factory(NESTED_GROUPS_DMRPP_XML_STR) + result = nested_groups_dmrpp._parse_dim(nested_groups_dmrpp.find_node_fqn(dim_path)) assert result == expected @pytest.mark.parametrize("dim_path", ["/", "/mask"]) -def test_find_dimension_tags(basic_dmrpp, dim_path): +def test_find_dimension_tags(dmrparser_factory, dim_path): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + # Check that Dimension tags match Dimension tags from the root # Check that Dim tags reference the same Dimension tags from the root assert basic_dmrpp._find_dimension_tags( @@ -263,7 +293,9 @@ def test_find_dimension_tags(basic_dmrpp, dim_path): ) == basic_dmrpp.root.findall("dap:Dimension", basic_dmrpp._NS) -def test_parse_variable(basic_dmrpp): +def test_parse_variable(dmrparser_factory): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data")) assert var.dtype == "float32" assert var.dims == ("x", "y") @@ -288,7 +320,9 @@ def test_parse_variable(basic_dmrpp): ("data/_FillValue", {"_FillValue": -32768}), ], ) -def test_parse_attribute(basic_dmrpp, attr_path, expected): +def test_parse_attribute(dmrparser_factory, attr_path, expected): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + result = basic_dmrpp._parse_attribute(basic_dmrpp.find_node_fqn(attr_path)) assert result == expected @@ -311,7 +345,9 @@ def test_parse_attribute(basic_dmrpp, attr_path, expected): ), ], ) -def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters): +def test_parse_filters(dmrparser_factory, var_path, dtype, expected_filters): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS ) @@ -319,6 +355,7 @@ def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters): assert result == expected_filters +@pytest.mark.xfail(reason="probably failing because of hardcoded data file path") @pytest.mark.parametrize( "var_path, chunk_shape, expected_lengths, expected_offsets, expected_paths", [ @@ -339,13 +376,15 @@ def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters): ], ) def test_parse_chunks( - basic_dmrpp, + dmrparser_factory, var_path, chunk_shape, expected_lengths, expected_offsets, expected_paths, ): + basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS ) From e521e20e16db355ec22ed2a857d7c530fdd464d3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 3 Dec 2024 11:53:32 -0500 Subject: [PATCH 2/4] rewrite using global dict of XML strings --- virtualizarr/tests/test_readers/test_dmrpp.py | 300 +++++++++--------- 1 file changed, 149 insertions(+), 151 deletions(-) diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py index 293ad9d3..d1875337 100644 --- a/virtualizarr/tests/test_readers/test_dmrpp.py +++ b/virtualizarr/tests/test_readers/test_dmrpp.py @@ -21,151 +21,150 @@ # TODO: later add MUR, SWOT, TEMPO and others by using kerchunk JSON to read refs (rather than reading the whole netcdf file) ] - -BASIC_DMRPP_XML_STR = textwrap.dedent( - """\ - - - - - - - - - grid x-axis - - - - - - - - - grid y-axis - - - - - - - - - grid z-axis - - - - - - - - - - analysed sea surface temperature - - - 1 - 2 - 3 - - - -32768 - - - 298.14999999999998 - - - 0.001 - - - x y z - - - 360 720 - - - - - - - - - - - - - - - - - mask - - - - - - - CF-1.6 - - - Sample Dataset - - - """ -) - - -NESTED_GROUPS_DMRPP_XML_STR = textwrap.dedent( - """\ - - - - - - - - - - - - - - - - - +DMRPP_XML_STRINGS = { + "basic": textwrap.dedent( + """\ + + + - - - test + + + grid x-axis - - - test + + + grid y-axis - + - - - - + + + + grid z-axis + + + + + + + + + + analysed sea surface temperature + + + 1 + 2 + 3 + + + -32768 + + + 298.14999999999998 + + + 0.001 + + + x y z + + + 360 720 + + + + + + + + + + + + + + + + + mask + + + + + + + CF-1.6 + + + Sample Dataset + + + """ + ), + "nested_groups": textwrap.dedent( + """\ + + + + + + - - - - """ -) + + + + + + + + + + + + + test + + + + + + + + + test + + + + + + + + + + + + + + + + + """ + ), +} @pytest.fixture @@ -196,37 +195,36 @@ def test_NASA_dmrpp(data_url, dmrpp_url): @pytest.mark.parametrize( - "dmrpp_xml_str, fqn_path, expected_xpath", + "dmrpp_xml_str_key, fqn_path, expected_xpath", [ - (BASIC_DMRPP_XML_STR, "/", "."), - (BASIC_DMRPP_XML_STR, "/data", "./*[@name='data']"), - (BASIC_DMRPP_XML_STR, "/data/items", "./*[@name='data']/*[@name='items']"), + ("basic", "/", "."), + ("basic", "/data", "./*[@name='data']"), + ("basic", "/data/items", "./*[@name='data']/*[@name='items']"), ( - NESTED_GROUPS_DMRPP_XML_STR, + "nested_groups", "/group1/group2/area", "./*[@name='group1']/*[@name='group2']/*[@name='area']", ), ], ) -def test_find_node_fqn(dmrparser_factory, dmrpp_xml_str, fqn_path, expected_xpath): - parser_instance = dmrparser_factory(dmrpp_xml_str) +def test_find_node_fqn(dmrparser_factory, dmrpp_xml_str_key, fqn_path, expected_xpath): + parser_instance = dmrparser_factory(DMRPP_XML_STRINGS[dmrpp_xml_str_key]) result = parser_instance.find_node_fqn(fqn_path) expected = parser_instance.root.find(expected_xpath, parser_instance._NS) assert result == expected -# TODO change how this is parametrized to only use a string name not the entire DMRPP XML string @pytest.mark.parametrize( - "dmrpp_xml_str, group_path", + "dmrpp_xml_str_key, group_path", [ - (BASIC_DMRPP_XML_STR, "/"), - (NESTED_GROUPS_DMRPP_XML_STR, "/"), - (NESTED_GROUPS_DMRPP_XML_STR, "/group1"), - (NESTED_GROUPS_DMRPP_XML_STR, "/group1/group2"), + ("basic", "/"), + ("nested_groups", "/"), + ("nested_groups", "/group1"), + ("nested_groups", "/group1/group2"), ], ) -def test_split_groups(dmrparser_factory, dmrpp_xml_str, group_path): - dmrpp_instance = dmrparser_factory(dmrpp_xml_str) +def test_split_groups(dmrparser_factory, dmrpp_xml_str_key, group_path): + dmrpp_instance = dmrparser_factory(DMRPP_XML_STRINGS[dmrpp_xml_str_key]) # get all tags in a dataset (so all tags excluding nested groups) dataset_tags = lambda x: [ @@ -241,7 +239,7 @@ def test_split_groups(dmrparser_factory, dmrpp_xml_str, group_path): def test_parse_dataset(dmrparser_factory): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) vds = basic_dmrpp.parse_dataset() assert vds.sizes == {"x": 720, "y": 1440, "z": 3} @@ -250,7 +248,7 @@ def test_parse_dataset(dmrparser_factory): assert vds.attrs == {"Conventions": "CF-1.6", "title": "Sample Dataset"} assert vds.coords.keys() == {"x", "y", "z"} - nested_groups_dmrpp = dmrparser_factory(NESTED_GROUPS_DMRPP_XML_STR) + nested_groups_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["nested_groups"]) vds_root_implicit = nested_groups_dmrpp.parse_dataset() vds_root = nested_groups_dmrpp.parse_dataset(group="/") @@ -276,7 +274,7 @@ def test_parse_dataset(dmrparser_factory): ], ) def test_parse_dim(dmrparser_factory, dim_path, expected): - nested_groups_dmrpp = dmrparser_factory(NESTED_GROUPS_DMRPP_XML_STR) + nested_groups_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["nested_groups"]) result = nested_groups_dmrpp._parse_dim(nested_groups_dmrpp.find_node_fqn(dim_path)) assert result == expected @@ -284,7 +282,7 @@ def test_parse_dim(dmrparser_factory, dim_path, expected): @pytest.mark.parametrize("dim_path", ["/", "/mask"]) def test_find_dimension_tags(dmrparser_factory, dim_path): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) # Check that Dimension tags match Dimension tags from the root # Check that Dim tags reference the same Dimension tags from the root @@ -294,7 +292,7 @@ def test_find_dimension_tags(dmrparser_factory, dim_path): def test_parse_variable(dmrparser_factory): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data")) assert var.dtype == "float32" @@ -321,7 +319,7 @@ def test_parse_variable(dmrparser_factory): ], ) def test_parse_attribute(dmrparser_factory, attr_path, expected): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) result = basic_dmrpp._parse_attribute(basic_dmrpp.find_node_fqn(attr_path)) assert result == expected @@ -346,7 +344,7 @@ def test_parse_attribute(dmrparser_factory, attr_path, expected): ], ) def test_parse_filters(dmrparser_factory, var_path, dtype, expected_filters): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS @@ -383,7 +381,7 @@ def test_parse_chunks( expected_offsets, expected_paths, ): - basic_dmrpp = dmrparser_factory(BASIC_DMRPP_XML_STR) + basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS From 4ff17d0cf280bc052d65f96e9a4eaba775fbc03b Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 3 Dec 2024 14:02:57 -0500 Subject: [PATCH 3/4] fix final test by explicitly passing in tmp_path instead of using a fixture which requests tmp_path --- virtualizarr/tests/test_readers/test_dmrpp.py | 79 +++++++++---------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py index d1875337..5ea85e66 100644 --- a/virtualizarr/tests/test_readers/test_dmrpp.py +++ b/virtualizarr/tests/test_readers/test_dmrpp.py @@ -1,6 +1,5 @@ import textwrap from pathlib import Path -from typing import Callable, Generator from xml.etree import ElementTree as ET import numpy as np @@ -167,22 +166,13 @@ } -@pytest.fixture -def dmrparser_factory( - tmp_path: Path, -) -> Generator[ - Callable[[str], DMRParser], - None, - None, -]: - def _dmrparser(dmrpp_xml_str: str) -> DMRParser: - # TODO should we actually create then read a dmrpp file in this temporary directory? - # seems a bit pointless if all we're returning is the DMRParser object +def dmrparser(dmrpp_xml_str: str, tmp_path: Path, filename="test.nc") -> DMRParser: + # TODO we should actually create a dmrpp file in a temporary directory + # this would avoid the need to pass tmp_path separately - # TODO do we need to adjust the data_filepath, e.g. to end with .nc? - return DMRParser(root=ET.fromstring(dmrpp_xml_str), data_filepath=tmp_path) - - yield _dmrparser + return DMRParser( + root=ET.fromstring(dmrpp_xml_str), data_filepath=tmp_path / filename + ) @network @@ -207,8 +197,8 @@ def test_NASA_dmrpp(data_url, dmrpp_url): ), ], ) -def test_find_node_fqn(dmrparser_factory, dmrpp_xml_str_key, fqn_path, expected_xpath): - parser_instance = dmrparser_factory(DMRPP_XML_STRINGS[dmrpp_xml_str_key]) +def test_find_node_fqn(tmp_path, dmrpp_xml_str_key, fqn_path, expected_xpath): + parser_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path) result = parser_instance.find_node_fqn(fqn_path) expected = parser_instance.root.find(expected_xpath, parser_instance._NS) assert result == expected @@ -223,8 +213,8 @@ def test_find_node_fqn(dmrparser_factory, dmrpp_xml_str_key, fqn_path, expected_ ("nested_groups", "/group1/group2"), ], ) -def test_split_groups(dmrparser_factory, dmrpp_xml_str_key, group_path): - dmrpp_instance = dmrparser_factory(DMRPP_XML_STRINGS[dmrpp_xml_str_key]) +def test_split_groups(tmp_path, dmrpp_xml_str_key, group_path): + dmrpp_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path) # get all tags in a dataset (so all tags excluding nested groups) dataset_tags = lambda x: [ @@ -238,8 +228,8 @@ def test_split_groups(dmrparser_factory, dmrpp_xml_str_key, group_path): assert result_tags == expected_tags -def test_parse_dataset(dmrparser_factory): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) +def test_parse_dataset(tmp_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) vds = basic_dmrpp.parse_dataset() assert vds.sizes == {"x": 720, "y": 1440, "z": 3} @@ -248,7 +238,9 @@ def test_parse_dataset(dmrparser_factory): assert vds.attrs == {"Conventions": "CF-1.6", "title": "Sample Dataset"} assert vds.coords.keys() == {"x", "y", "z"} - nested_groups_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["nested_groups"]) + nested_groups_dmrpp = dmrparser( + DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path + ) vds_root_implicit = nested_groups_dmrpp.parse_dataset() vds_root = nested_groups_dmrpp.parse_dataset(group="/") @@ -273,16 +265,18 @@ def test_parse_dataset(dmrparser_factory): ("/group1/x", {"x": 720}), ], ) -def test_parse_dim(dmrparser_factory, dim_path, expected): - nested_groups_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["nested_groups"]) +def test_parse_dim(tmp_path, dim_path, expected): + nested_groups_dmrpp = dmrparser( + DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path + ) result = nested_groups_dmrpp._parse_dim(nested_groups_dmrpp.find_node_fqn(dim_path)) assert result == expected @pytest.mark.parametrize("dim_path", ["/", "/mask"]) -def test_find_dimension_tags(dmrparser_factory, dim_path): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) +def test_find_dimension_tags(tmp_path, dim_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) # Check that Dimension tags match Dimension tags from the root # Check that Dim tags reference the same Dimension tags from the root @@ -291,8 +285,8 @@ def test_find_dimension_tags(dmrparser_factory, dim_path): ) == basic_dmrpp.root.findall("dap:Dimension", basic_dmrpp._NS) -def test_parse_variable(dmrparser_factory): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) +def test_parse_variable(tmp_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data")) assert var.dtype == "float32" @@ -318,8 +312,8 @@ def test_parse_variable(dmrparser_factory): ("data/_FillValue", {"_FillValue": -32768}), ], ) -def test_parse_attribute(dmrparser_factory, attr_path, expected): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) +def test_parse_attribute(tmp_path, attr_path, expected): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) result = basic_dmrpp._parse_attribute(basic_dmrpp.find_node_fqn(attr_path)) assert result == expected @@ -343,8 +337,8 @@ def test_parse_attribute(dmrparser_factory, attr_path, expected): ), ], ) -def test_parse_filters(dmrparser_factory, var_path, dtype, expected_filters): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) +def test_parse_filters(tmp_path, var_path, dtype, expected_filters): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS @@ -353,40 +347,45 @@ def test_parse_filters(dmrparser_factory, var_path, dtype, expected_filters): assert result == expected_filters -@pytest.mark.xfail(reason="probably failing because of hardcoded data file path") @pytest.mark.parametrize( - "var_path, chunk_shape, expected_lengths, expected_offsets, expected_paths", + "var_path, chunk_shape, chunk_grid_shape, expected_lengths, expected_offsets", [ ( "/data", (360, 720), + (3, 3), np.full((3, 3), 4083, dtype=np.uint64), (np.arange(9, dtype=np.uint64) * 4083 + 40762).reshape(3, 3), - np.full((3, 3), "test.dmrpp", dtype=np.dtypes.StringDType), ), ( "/mask", (720, 1440), + (1,), np.array([4], dtype=np.uint64), np.array([41276], dtype=np.uint64), - np.array(["test.dmrpp"], dtype=np.dtypes.StringDType), ), ], ) def test_parse_chunks( - dmrparser_factory, + tmp_path, var_path, chunk_shape, + chunk_grid_shape, expected_lengths, expected_offsets, - expected_paths, ): - basic_dmrpp = dmrparser_factory(DMRPP_XML_STRINGS["basic"]) + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS ) result = basic_dmrpp._parse_chunks(chunks_tag, chunk_shape) + + expected_paths = np.full( + shape=chunk_grid_shape, + fill_value=str(tmp_path / "test.nc"), + dtype=np.dtypes.StringDType, + ) expected = ChunkManifest.from_arrays( lengths=expected_lengths, offsets=expected_offsets, paths=expected_paths ) From bf0bb87b4cf352eb9d44f6cbc3fab5741bb2c699 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 3 Dec 2024 14:23:41 -0500 Subject: [PATCH 4/4] fix bug with not converting Path objects to strings --- virtualizarr/tests/test_readers/test_dmrpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py index 5ea85e66..d6238ca5 100644 --- a/virtualizarr/tests/test_readers/test_dmrpp.py +++ b/virtualizarr/tests/test_readers/test_dmrpp.py @@ -171,7 +171,7 @@ def dmrparser(dmrpp_xml_str: str, tmp_path: Path, filename="test.nc") -> DMRPars # this would avoid the need to pass tmp_path separately return DMRParser( - root=ET.fromstring(dmrpp_xml_str), data_filepath=tmp_path / filename + root=ET.fromstring(dmrpp_xml_str), data_filepath=str(tmp_path / filename) )