Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/Update validation for columnar #734

Merged
merged 6 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions src/power_grid_model/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,16 +307,12 @@ def compatibility_convert_row_columnar_dataset(
dataset_type: DatasetType,
available_components: list[ComponentType] | None = None,
) -> Dataset:
"""Temporary function to copy row based dataset to a column based dataset as per the data_filter.
The purpose of this function is to mimic columnar data without any memory footprint benefits.
Note: If both the input and requested output are row based, the same dataset is returned without a copy.
"""Temporary function to transform row, column or mixed based datasets to a full row or column based dataset as per
the data_filter. The purpose of this function is to mimic columnar data and transform back to row data without any
memory footprint benefits.
Note: Copies are made on a per-component basis; if a component is row based in both the input and the requested
output, that componened is returned without a copy.

Args:
data (Dataset):
component_types (_ComponentAttributeMappingDict):

Returns:
Dataset: converted dataset
Args:
data (Dataset): dataset to convert
data_filter (ComponentAttributeMapping): desired component and attribute mapping
Expand Down
15 changes: 12 additions & 3 deletions src/power_grid_model/validation/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy as np

from power_grid_model import ComponentType, DatasetType, power_grid_meta_data
from power_grid_model._utils import convert_batch_dataset_to_batch_list
from power_grid_model._utils import compatibility_convert_row_columnar_dataset, convert_batch_dataset_to_batch_list
from power_grid_model.data_types import BatchDataset, Dataset, SingleDataset
from power_grid_model.enum import (
Branch3Side,
Expand Down Expand Up @@ -83,8 +83,12 @@ def validate_input_data(
Raises:
Error: KeyError | TypeError | ValueError: if the data structure is invalid.
"""
# Convert to row based if in columnar or mixed format format
row_input_data = compatibility_convert_row_columnar_dataset(input_data, None, DatasetType.input)

# A deep copy is made of the input data, since default values will be added in the validation process
input_data_copy = copy.deepcopy(input_data)
input_data_copy = copy.deepcopy(row_input_data)

assert_valid_data_structure(input_data_copy, DatasetType.input)

errors: list[ValidationError] = []
Expand Down Expand Up @@ -131,8 +135,13 @@ def validate_batch_data(

input_errors: list[ValidationError] = list(validate_unique_ids_across_components(input_data))

# Convert to row based if in columnar format
# TODO(figueroa1395): transform to columnar per single batch scenario once the columnar dataset python extension
# is finished
row_update_data = compatibility_convert_row_columnar_dataset(update_data, None, DatasetType.update)

# Splitting update_data_into_batches may raise TypeErrors and ValueErrors
batch_data = convert_batch_dataset_to_batch_list(update_data)
batch_data = convert_batch_dataset_to_batch_list(row_update_data)

errors = {}
for batch, batch_update_data in enumerate(batch_data):
Expand Down
15 changes: 13 additions & 2 deletions tests/unit/validation/test_batch_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import numpy as np
import pytest

from power_grid_model import LoadGenType, initialize_array
from power_grid_model import DatasetType, LoadGenType, initialize_array
from power_grid_model._utils import compatibility_convert_row_columnar_dataset
from power_grid_model.validation import validate_batch_data
from power_grid_model.validation.errors import MultiComponentNotUniqueError, NotBooleanError

Expand Down Expand Up @@ -41,7 +42,7 @@ def input_data() -> dict[str, np.ndarray]:


@pytest.fixture
def batch_data() -> dict[str, np.ndarray]:
def original_batch_data() -> dict[str, np.ndarray]:
line = initialize_array("update", "line", (3, 2))
line["id"] = [[5, 6], [6, 7], [7, 5]]
line["from_status"] = [[1, 1], [1, 1], [1, 1]]
Expand All @@ -53,6 +54,16 @@ def batch_data() -> dict[str, np.ndarray]:
return {"line": line, "asym_load": asym_load}


@pytest.fixture
def original_batch_data_columnar(original_batch_data):
return compatibility_convert_row_columnar_dataset(original_batch_data, Ellipsis, DatasetType.update)


@pytest.fixture(params=["original_batch_data", "original_batch_data_columnar"])
def batch_data(request):
return request.getfixturevalue(request.param)


def test_validate_batch_data(input_data, batch_data):
errors = validate_batch_data(input_data, batch_data)
assert not errors
Expand Down
13 changes: 12 additions & 1 deletion tests/unit/validation/test_input_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
WindingType,
initialize_array,
)
from power_grid_model._utils import compatibility_convert_row_columnar_dataset
from power_grid_model.enum import CalculationType, FaultPhase, FaultType
from power_grid_model.validation import validate_input_data
from power_grid_model.validation.errors import (
Expand All @@ -39,7 +40,7 @@


@pytest.fixture
def input_data() -> dict[ComponentType, np.ndarray]:
def original_data() -> dict[ComponentType, np.ndarray]:
node = initialize_array(DatasetType.input, ComponentType.node, 4)
node["id"] = [0, 2, 1, 2]
node["u_rated"] = [10.5e3, 10.5e3, 0, 10.5e3]
Expand Down Expand Up @@ -274,6 +275,16 @@ def input_data() -> dict[ComponentType, np.ndarray]:
return data


@pytest.fixture
def original_data_columnar(original_data):
return compatibility_convert_row_columnar_dataset(original_data, Ellipsis, DatasetType.input)


@pytest.fixture(params=["original_data", "original_data_columnar"])
def input_data(request):
return request.getfixturevalue(request.param)


def test_validate_input_data_sym_calculation(input_data):
validation_errors = validate_input_data(input_data, symmetric=True)

Expand Down
Loading