From e951ec3afeea097a712c468ab50873e181d246f9 Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Tue, 24 Sep 2024 11:03:03 +0200 Subject: [PATCH 1/6] added columnar data validation Signed-off-by: Santiago Figueroa Manrique --- src/power_grid_model/_utils.py | 14 +++++--------- src/power_grid_model/validation/validation.py | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index a2d1b52ef..37aac44d0 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -307,16 +307,12 @@ def compatibility_convert_row_columnar_dataset( dataset_type: DatasetType, available_components: list[ComponentType] | None = None, ) -> Dataset: - """Temporary function to copy row based dataset to a column based dataset as per the data_filter. - The purpose of this function is to mimic columnar data without any memory footprint benefits. - Note: If both the input and requested output are row based, the same dataset is returned without a copy. + """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per + the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any + memory footprint benefits. + Note: Copies are made in a per-component basis; if a component is row based in both the input and the requested + output, that componened is returned without a copy. - Args: - data (Dataset): - component_types (_ComponentAttributeMappingDict): - - Returns: - Dataset: converted dataset Args: data (Dataset): dataset to convert data_filter (ComponentAttributeMapping): desired component and attribute mapping diff --git a/src/power_grid_model/validation/validation.py b/src/power_grid_model/validation/validation.py index 7ac8b19a6..24a915dfa 100644 --- a/src/power_grid_model/validation/validation.py +++ b/src/power_grid_model/validation/validation.py @@ -16,7 +16,7 @@ import numpy as np from power_grid_model import ComponentType, DatasetType, power_grid_meta_data -from power_grid_model._utils import convert_batch_dataset_to_batch_list +from power_grid_model._utils import compatibility_convert_row_columnar_dataset, convert_batch_dataset_to_batch_list from power_grid_model.data_types import BatchDataset, Dataset, SingleDataset from power_grid_model.enum import ( Branch3Side, @@ -83,8 +83,12 @@ def validate_input_data( Raises: Error: KeyError | TypeError | ValueError: if the data structure is invalid. """ + # Convert to row based if in columnar or mixed format format + row_input_data = compatibility_convert_row_columnar_dataset(input_data, None, DatasetType.input) + # A deep copy is made of the input data, since default values will be added in the validation process - input_data_copy = copy.deepcopy(input_data) + input_data_copy = copy.deepcopy(row_input_data) + assert_valid_data_structure(input_data_copy, DatasetType.input) errors: list[ValidationError] = [] @@ -136,12 +140,15 @@ def validate_batch_data( errors = {} for batch, batch_update_data in enumerate(batch_data): - assert_valid_data_structure(batch_update_data, DatasetType.update) - id_errors: list[ValidationError] = list(validate_ids_exist(batch_update_data, input_data)) + # Convert to row based if in columnar format + row_batch_update_data = compatibility_convert_row_columnar_dataset(batch_update_data, None, DatasetType.update) + + assert_valid_data_structure(row_batch_update_data, DatasetType.update) + id_errors: list[ValidationError] = list(validate_ids_exist(row_batch_update_data, input_data)) batch_errors = input_errors + id_errors if not id_errors: - merged_data = update_input_data(input_data, batch_update_data) + merged_data = update_input_data(input_data, row_batch_update_data) batch_errors += validate_required_values(merged_data, calculation_type, symmetric) batch_errors += validate_values(merged_data, calculation_type) From 9f92e6c6638ab29bbd703b841c3d7e856307c4fb Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Tue, 24 Sep 2024 12:50:32 +0200 Subject: [PATCH 2/6] input validation tests Signed-off-by: Santiago Figueroa Manrique --- tests/unit/validation/test_input_validation.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/unit/validation/test_input_validation.py b/tests/unit/validation/test_input_validation.py index 4e610fc7b..36aa038f8 100644 --- a/tests/unit/validation/test_input_validation.py +++ b/tests/unit/validation/test_input_validation.py @@ -16,6 +16,7 @@ WindingType, initialize_array, ) +from power_grid_model._utils import compatibility_convert_row_columnar_dataset from power_grid_model.enum import CalculationType, FaultPhase, FaultType from power_grid_model.validation import validate_input_data from power_grid_model.validation.errors import ( @@ -39,7 +40,7 @@ @pytest.fixture -def input_data() -> dict[ComponentType, np.ndarray]: +def original_data() -> dict[ComponentType, np.ndarray]: node = initialize_array(DatasetType.input, ComponentType.node, 4) node["id"] = [0, 2, 1, 2] node["u_rated"] = [10.5e3, 10.5e3, 0, 10.5e3] @@ -274,6 +275,16 @@ def input_data() -> dict[ComponentType, np.ndarray]: return data +@pytest.fixture +def original_data_columnar(original_data): + return compatibility_convert_row_columnar_dataset(original_data, ..., DatasetType.input) + + +@pytest.fixture(params=["original_data", "original_data_columnar"]) +def input_data(request): + return request.getfixturevalue(request.param) + + def test_validate_input_data_sym_calculation(input_data): validation_errors = validate_input_data(input_data, symmetric=True) From b90bd67c9cb758ccc271bbb8e6d4120e837f86af Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Wed, 25 Sep 2024 08:20:48 +0200 Subject: [PATCH 3/6] batch validation tests - failing Signed-off-by: Santiago Figueroa Manrique --- tests/unit/validation/test_batch_validation.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/unit/validation/test_batch_validation.py b/tests/unit/validation/test_batch_validation.py index 9047e2e61..b035d33c2 100644 --- a/tests/unit/validation/test_batch_validation.py +++ b/tests/unit/validation/test_batch_validation.py @@ -6,7 +6,8 @@ import numpy as np import pytest -from power_grid_model import LoadGenType, initialize_array +from power_grid_model._utils import compatibility_convert_row_columnar_dataset +from power_grid_model import DatasetType, LoadGenType, initialize_array from power_grid_model.validation import validate_batch_data from power_grid_model.validation.errors import MultiComponentNotUniqueError, NotBooleanError @@ -41,7 +42,7 @@ def input_data() -> dict[str, np.ndarray]: @pytest.fixture -def batch_data() -> dict[str, np.ndarray]: +def original_batch_data() -> dict[str, np.ndarray]: line = initialize_array("update", "line", (3, 2)) line["id"] = [[5, 6], [6, 7], [7, 5]] line["from_status"] = [[1, 1], [1, 1], [1, 1]] @@ -53,6 +54,16 @@ def batch_data() -> dict[str, np.ndarray]: return {"line": line, "asym_load": asym_load} +@pytest.fixture +def original_batch_data_columnar(original_batch_data): + return compatibility_convert_row_columnar_dataset(original_batch_data, ..., DatasetType.update) + + +@pytest.fixture(params=["original_batch_data", "original_batch_data_columnar"]) +def batch_data(request): + return request.getfixturevalue(request.param) + + def test_validate_batch_data(input_data, batch_data): errors = validate_batch_data(input_data, batch_data) assert not errors From 9ff63264c3ef9bae33d3c59ae95f4d1ec1942ed0 Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Wed, 25 Sep 2024 08:32:29 +0200 Subject: [PATCH 4/6] batch validation tests pass - temporary Signed-off-by: Santiago Figueroa Manrique --- src/power_grid_model/validation/validation.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/power_grid_model/validation/validation.py b/src/power_grid_model/validation/validation.py index 24a915dfa..850ef07e5 100644 --- a/src/power_grid_model/validation/validation.py +++ b/src/power_grid_model/validation/validation.py @@ -135,20 +135,21 @@ def validate_batch_data( input_errors: list[ValidationError] = list(validate_unique_ids_across_components(input_data)) + # Convert to row based if in columnar format + # TODO(figueroa1395): transform to columnar per single batch once the columnar dataset python extension is finished + row_update_data = compatibility_convert_row_columnar_dataset(update_data, None, DatasetType.update) + # Splitting update_data_into_batches may raise TypeErrors and ValueErrors - batch_data = convert_batch_dataset_to_batch_list(update_data) + batch_data = convert_batch_dataset_to_batch_list(row_update_data) errors = {} for batch, batch_update_data in enumerate(batch_data): - # Convert to row based if in columnar format - row_batch_update_data = compatibility_convert_row_columnar_dataset(batch_update_data, None, DatasetType.update) - - assert_valid_data_structure(row_batch_update_data, DatasetType.update) - id_errors: list[ValidationError] = list(validate_ids_exist(row_batch_update_data, input_data)) + assert_valid_data_structure(batch_update_data, DatasetType.update) + id_errors: list[ValidationError] = list(validate_ids_exist(batch_update_data, input_data)) batch_errors = input_errors + id_errors if not id_errors: - merged_data = update_input_data(input_data, row_batch_update_data) + merged_data = update_input_data(input_data, batch_update_data) batch_errors += validate_required_values(merged_data, calculation_type, symmetric) batch_errors += validate_values(merged_data, calculation_type) From 291b134fe9acd610cd3e7deb44df5d8631a35f35 Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Wed, 25 Sep 2024 08:52:33 +0200 Subject: [PATCH 5/6] Formatting Signed-off-by: Santiago Figueroa Manrique --- tests/unit/validation/test_batch_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/validation/test_batch_validation.py b/tests/unit/validation/test_batch_validation.py index b035d33c2..5636914fa 100644 --- a/tests/unit/validation/test_batch_validation.py +++ b/tests/unit/validation/test_batch_validation.py @@ -6,8 +6,8 @@ import numpy as np import pytest -from power_grid_model._utils import compatibility_convert_row_columnar_dataset from power_grid_model import DatasetType, LoadGenType, initialize_array +from power_grid_model._utils import compatibility_convert_row_columnar_dataset from power_grid_model.validation import validate_batch_data from power_grid_model.validation.errors import MultiComponentNotUniqueError, NotBooleanError From 48434b752f1d84fcb3578e473b95b5ff66d1cf7f Mon Sep 17 00:00:00 2001 From: Santiago Figueroa Manrique Date: Wed, 25 Sep 2024 09:21:46 +0200 Subject: [PATCH 6/6] Addressed comments Signed-off-by: Santiago Figueroa Manrique --- src/power_grid_model/_utils.py | 2 +- src/power_grid_model/validation/validation.py | 3 ++- tests/unit/validation/test_batch_validation.py | 2 +- tests/unit/validation/test_input_validation.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py index 37aac44d0..764e37c40 100644 --- a/src/power_grid_model/_utils.py +++ b/src/power_grid_model/_utils.py @@ -310,7 +310,7 @@ def compatibility_convert_row_columnar_dataset( """Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any memory footprint benefits. - Note: Copies are made in a per-component basis; if a component is row based in both the input and the requested + Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested output, that componened is returned without a copy. Args: diff --git a/src/power_grid_model/validation/validation.py b/src/power_grid_model/validation/validation.py index 850ef07e5..d05afe667 100644 --- a/src/power_grid_model/validation/validation.py +++ b/src/power_grid_model/validation/validation.py @@ -136,7 +136,8 @@ def validate_batch_data( input_errors: list[ValidationError] = list(validate_unique_ids_across_components(input_data)) # Convert to row based if in columnar format - # TODO(figueroa1395): transform to columnar per single batch once the columnar dataset python extension is finished + # TODO(figueroa1395): transform to columnar per single batch scenario once the columnar dataset python extension + # is finished row_update_data = compatibility_convert_row_columnar_dataset(update_data, None, DatasetType.update) # Splitting update_data_into_batches may raise TypeErrors and ValueErrors diff --git a/tests/unit/validation/test_batch_validation.py b/tests/unit/validation/test_batch_validation.py index 5636914fa..ca9e55a1d 100644 --- a/tests/unit/validation/test_batch_validation.py +++ b/tests/unit/validation/test_batch_validation.py @@ -56,7 +56,7 @@ def original_batch_data() -> dict[str, np.ndarray]: @pytest.fixture def original_batch_data_columnar(original_batch_data): - return compatibility_convert_row_columnar_dataset(original_batch_data, ..., DatasetType.update) + return compatibility_convert_row_columnar_dataset(original_batch_data, Ellipsis, DatasetType.update) @pytest.fixture(params=["original_batch_data", "original_batch_data_columnar"]) diff --git a/tests/unit/validation/test_input_validation.py b/tests/unit/validation/test_input_validation.py index 36aa038f8..5f4e041b1 100644 --- a/tests/unit/validation/test_input_validation.py +++ b/tests/unit/validation/test_input_validation.py @@ -277,7 +277,7 @@ def original_data() -> dict[ComponentType, np.ndarray]: @pytest.fixture def original_data_columnar(original_data): - return compatibility_convert_row_columnar_dataset(original_data, ..., DatasetType.input) + return compatibility_convert_row_columnar_dataset(original_data, Ellipsis, DatasetType.input) @pytest.fixture(params=["original_data", "original_data_columnar"])