From 00120b1d3e74096da4f0e9875b2da51b76b4563a Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sun, 6 Oct 2024 15:30:52 +0200 Subject: [PATCH 01/10] test(enums): fix doctests (#1267) --- openfisca_core/indexed_enums/enum.py | 170 ++++++++++++++++-- openfisca_core/indexed_enums/enum_array.py | 136 ++++++++++++-- .../indexed_enums/tests/__init__.py | 0 .../indexed_enums/tests/test_enum.py | 152 ++++++++++++++++ .../indexed_enums/tests/test_enum_array.py | 30 ++++ 5 files changed, 462 insertions(+), 26 deletions(-) create mode 100644 openfisca_core/indexed_enums/tests/__init__.py create mode 100644 openfisca_core/indexed_enums/tests/test_enum.py create mode 100644 openfisca_core/indexed_enums/tests/test_enum_array.py diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 56f007941e..3aa633eb33 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -13,6 +13,69 @@ class Enum(t.Enum): Its items have an :class:`int` index, useful and performant when running :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> repr(Housing) + "" + + >>> repr(Housing.TENANT) + "" + + >>> str(Housing.TENANT) + 'Housing.TENANT' + + >>> dict([(Housing.TENANT, Housing.TENANT.value)]) + {: 'Tenant'} + + >>> list(Housing) + [, , ...] + + >>> Housing["TENANT"] + + + >>> Housing("Tenant") + + + >>> Housing.TENANT in Housing + True + + >>> len(Housing) + 4 + + >>> Housing.TENANT == Housing.TENANT + True + + >>> Housing.TENANT != Housing.TENANT + False + + >>> Housing.TENANT > Housing.TENANT + False + + >>> Housing.TENANT < Housing.TENANT + False + + >>> Housing.TENANT >= Housing.TENANT + True + + >>> Housing.TENANT <= Housing.TENANT + True + + >>> Housing.TENANT.index + 1 + + >>> Housing.TENANT.name + 'TENANT' + + >>> Housing.TENANT.value + 'Tenant' + """ #: The :attr:`index` of the :class:`.Enum` member. @@ -28,14 +91,61 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: *__args: Positional arguments. **__kwargs: Keyword arguments. + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Housing = enum.Enum("Housing", "owner tenant") + >>> Housing.tenant.index + 1 + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.TENANT.index + 1 + + >>> array = numpy.array([[1, 2], [3, 4]]) + >>> array[Housing.TENANT.index] + array([3, 4]) + Note: ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. """ self.index = len(self._member_names_) - #: Bypass the slow :meth:`enum.Enum.__eq__` method. - __eq__ = object.__eq__ + def __eq__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index == other.index + + def __ne__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index != other.index + + def __lt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index < other.index + + def __le__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index <= other.index + + def __gt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index > other.index + + def __ge__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index >= other.index #: :meth:`.__hash__` must also be defined so as to stay hashable. __hash__ = object.__hash__ @@ -53,19 +163,53 @@ def encode( Returns: EnumArray: An :class:`.EnumArray` with the encoded input values. - For instance: + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + # EnumArray + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> Housing.encode(enum_array) + EnumArray([]) + + # Array of Enum + + >>> array = numpy.array([Housing.TENANT]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of integers + + >>> array = numpy.array([1]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of bytes + + >>> array = numpy.array([b"TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of strings + + >>> array = numpy.array(["TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True - >>> string_identifier_array = asarray(["free_lodger", "owner"]) - >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) - >>> encoded_array[0] - 2 # Encoded value + .. seealso:: + :meth:`.EnumArray.decode` for decoding. - >>> free_lodger = HousingOccupancyStatus.free_lodger - >>> owner = HousingOccupancyStatus.owner - >>> enum_item_array = asarray([free_lodger, owner]) - >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) - >>> encoded_array[0] - 2 # Encoded value """ if isinstance(array, EnumArray): return array diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 93c8486989..334083dd3e 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -18,6 +18,52 @@ class EnumArray(t.EnumArray): about the :meth:`.__new__` and :meth:`.__array_finalize__` methods below, see `Subclassing ndarray`_. + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum, variables + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> repr(enum.EnumArray) + "" + + >>> repr(enum_array) + "EnumArray([])" + + >>> str(enum_array) + "['TENANT']" + + >>> list(enum_array) + [1] + + >>> enum_array[0] + 1 + + >>> enum_array[0] in enum_array + True + + >>> len(enum_array) + 1 + + >>> enum_array = enum.EnumArray(list(Housing), Housing) + >>> enum_array[Housing.TENANT.index] + + + >>> class OccupancyStatus(variables.Variable): + ... value_type = enum.Enum + ... possible_values = Housing + + >>> enum.EnumArray(array, OccupancyStatus.possible_values) + EnumArray([]) + .. _Subclassing ndarray: https://numpy.org/doc/stable/user/basics.subclassing.html @@ -59,6 +105,33 @@ def __eq__(self, other: object) -> bool: bool: When ??? ndarray[bool_]: When ??? + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array == 1 + array([ True]) + + >>> enum_array == [1] + array([ True]) + + >>> enum_array == [2] + array([False]) + + >>> enum_array == "1" + array([False]) + + >>> enum_array is None + False + Note: This breaks the `Liskov substitution principle`_. @@ -81,6 +154,33 @@ def __ne__(self, other: object) -> bool: bool: When ??? ndarray[bool_]: When ??? + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array != 1 + array([False]) + + >>> enum_array != [1] + array([False]) + + >>> enum_array != [2] + array([ True]) + + >>> enum_array != "1" + array([ True]) + + >>> enum_array is not None + True + Note: This breaks the `Liskov substitution principle`_. @@ -115,15 +215,19 @@ def decode(self) -> numpy.object_: Returns: ndarray[Enum]: The items of the :obj:`.EnumArray`. - For instance: + Examples: + >>> import numpy - >>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode()[0] - + >>> from openfisca_core import indexed_enums as enum - Decoded value: enum item + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode() + array([], dtype=object) """ return numpy.select( @@ -137,13 +241,19 @@ def decode_to_str(self) -> numpy.str_: Returns: ndarray[str_]: The string values of the :obj:`.EnumArray`. - For instance: + Examples: + >>> import numpy - >>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode_to_str()[0] - 'free_lodger' # String identifier + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode_to_str() + array(['TENANT'], dtype=' Date: Mon, 7 Oct 2024 15:37:55 +0200 Subject: [PATCH 02/10] test(enums): fix Enum.decode (#1267) --- openfisca_core/indexed_enums/enum.py | 93 ++++++++---- .../indexed_enums/tests/test_enum.py | 140 ++++++++---------- openfisca_core/indexed_enums/types.py | 22 ++- 3 files changed, 148 insertions(+), 107 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 3aa633eb33..b3f7f1dbb6 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -153,7 +153,16 @@ def __ge__(self, other: object) -> bool: @classmethod def encode( cls, - array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, + array: ( + EnumArray + | t.Array[t.DTypeStr] + | t.Array[t.DTypeInt] + | t.Array[t.DTypeEnum] + | t.Array[t.DTypeObject] + | t.ArrayLike[str] + | t.ArrayLike[int] + | t.ArrayLike[t.Enum] + ), ) -> EnumArray: """Encode an encodable array into an :class:`.EnumArray`. @@ -163,6 +172,11 @@ def encode( Returns: EnumArray: An :class:`.EnumArray` with the encoded input values. + Raises: + TypeError: If ``array`` is a scalar :class:`~numpy.ndarray`. + TypeError: If ``array`` is of a diffent :class:`.Enum` type. + NotImplementedError: If ``array`` is of an unsupported type. + Examples: >>> import numpy @@ -183,29 +197,29 @@ def encode( >>> array = numpy.array([Housing.TENANT]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) # Array of integers >>> array = numpy.array([1]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) - # Array of bytes + # Array of strings - >>> array = numpy.array([b"TENANT"]) + >>> array = numpy.array(["TENANT"]) >>> enum_array = Housing.encode(array) >>> enum_array[0] == Housing.TENANT.index True - # Array of strings + # Array of bytes - >>> array = numpy.array(["TENANT"]) + >>> array = numpy.array([b"TENANT"]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + Traceback (most recent call last): + NotImplementedError: Unsupported encoding: bytes48. .. seealso:: :meth:`.EnumArray.decode` for decoding. @@ -214,15 +228,36 @@ def encode( if isinstance(array, EnumArray): return array + if not isinstance(array, numpy.ndarray): + return cls.encode(numpy.array(array)) + + if array.size == 0: + return EnumArray(array, cls) + + if array.ndim == 0: + msg = ( + "Scalar arrays are not supported: expecting a vector array, " + f"instead. Please try again with `numpy.array([{array}])`." + ) + raise TypeError(msg) + + # Enum data type array + if numpy.issubdtype(array.dtype, t.DTypeEnum): + indexes = numpy.array([item.index for item in cls], t.DTypeEnum) + return EnumArray(indexes[array[array < indexes.size]], cls) + + # Integer array + if numpy.issubdtype(array.dtype, int): + array = numpy.array(array, dtype=t.DTypeEnum) + return cls.encode(array) + # String array - if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: - array = numpy.select( - [array == item.name for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) + if numpy.issubdtype(array.dtype, t.DTypeStr): + enums = [cls.__members__[key] for key in array if key in cls.__members__] + return cls.encode(enums) # Enum items arrays - elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": + if numpy.issubdtype(array.dtype, t.DTypeObject): # Ensure we are comparing the comparable. The problem this fixes: # On entering this method "cls" will generally come from # variable.possible_values, while the array values may come from @@ -234,15 +269,21 @@ def encode( # So, instead of relying on the "cls" passed in, we use only its # name to check that the values in the array, if non-empty, are of # the right type. - if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: - cls = array[0].__class__ - - array = numpy.select( - [array == item for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - return EnumArray(array, cls) + if cls.__name__ is array[0].__class__.__name__: + array = numpy.select( + [array == item for item in array[0].__class__], + [item.index for item in array[0].__class__], + ).astype(ENUM_ARRAY_DTYPE) + return EnumArray(array, cls) + + msg = ( + f"Diverging enum types are not supported: expected {cls.__name__}, " + f"but got {array[0].__class__.__name__} instead." + ) + raise TypeError(msg) + + msg = f"Unsupported encoding: {array.dtype.name}." + raise NotImplementedError(msg) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 55aa8208a4..059918f7d0 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -1,4 +1,5 @@ import numpy +import pytest from openfisca_core import indexed_enums as enum @@ -8,145 +9,126 @@ class Animal(enum.Enum): DOG = b"Dog" +class Colour(enum.Enum): + INCARNADINE = "incarnadine" + TURQUOISE = "turquoise" + AMARANTH = "amaranth" + + # Arrays of Enum -def test_enum_encode_with_enum_scalar_array(): - """Encode when called with an enum scalar array.""" - array = numpy.array(Animal.DOG) +def test_enum_encode_with_array_of_enum(): + """Does encode when called with an array of enums.""" + array = numpy.array([Animal.DOG]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG def test_enum_encode_with_enum_sequence(): - """Does not encode when called with an enum sequence.""" + """Does encode when called with an enum sequence.""" sequence = list(Animal) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index - - -def test_enum_encode_with_enum_scalar(): - """Does not encode when called with an enum scalar.""" - scalar = Animal.DOG - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index - - -# Arrays of int + assert Animal.DOG in enum_array -def test_enum_encode_with_int_scalar_array(): - """Does not encode when called with an int scalar array (noop).""" - array = numpy.array(1) - enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index +def test_enum_encode_with_enum_scalar_array(): + """Does not encode when called with an enum scalar array.""" + array = numpy.array(Animal.DOG) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_int_sequence(): - """Does not encode when called with an int sequence (noop).""" - sequence = range(1, 2) - enum_array = Animal.encode(sequence) - assert enum_array[0] == Animal.DOG.index - +def test_enum_encode_with_enum_with_bad_value(): + """Does not encode when called with a value not in an Enum.""" + array = numpy.array([Colour.AMARANTH]) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_int_scalar(): - """Does not encode when called with an int scalar (noop).""" - scalar = 1 - enum_array = Animal.encode(scalar) - assert enum_array == Animal.DOG.index +# Arrays of int -# Arrays of bytes -def test_enum_encode_with_bytes_scalar_array(): - """Encode when called with a bytes scalar array.""" - array = numpy.array(b"DOG") +def test_enum_encode_with_array_of_int(): + """Does encode when called with an array of int.""" + array = numpy.array([1]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG -def test_enum_encode_with_bytes_sequence(): - """Does not encode when called with a bytes sequence.""" - sequence = bytearray(b"DOG") +def test_enum_encode_with_int_sequence(): + """Does encode when called with an int sequence.""" + sequence = (1, 2) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index + assert Animal.DOG in enum_array -def test_enum_encode_with_bytes_scalar(): - """Does not encode when called with a bytes scalar.""" - scalar = b"DOG" - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index +def test_enum_encode_with_int_scalar_array(): + """Does not encode when called with an int scalar array.""" + array = numpy.array(1) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_bytes_with_bad_value(): +def test_enum_encode_with_int_with_bad_value(): """Does not encode when called with a value not in an Enum.""" - array = numpy.array([b"IGUANA"]) + array = numpy.array([2]) enum_array = Animal.encode(array) - assert enum_array != Animal.CAT.index - assert enum_array != Animal.DOG.index + assert len(enum_array) == 0 # Arrays of strings -def test_enum_encode_with_str_scalar_array(): - """Encode when called with a str scalar array.""" - array = numpy.array("DOG") +def test_enum_encode_with_array_of_string(): + """Does encode when called with an array of string.""" + array = numpy.array(["DOG"]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG def test_enum_encode_with_str_sequence(): - """Does not encode when called with a str sequence.""" + """Does encode when called with a str sequence.""" sequence = ("DOG",) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index + assert Animal.DOG in enum_array -def test_enum_encode_with_str_scalar(): - """Does not encode when called with a str scalar.""" - scalar = "DOG" - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index +def test_enum_encode_with_str_scalar_array(): + """Does not encode when called with a str scalar array.""" + array = numpy.array("DOG") + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_str_with_bad_value(): """Does not encode when called with a value not in an Enum.""" array = numpy.array(["JAIBA"]) enum_array = Animal.encode(array) - assert enum_array != Animal.CAT.index - assert enum_array != Animal.DOG.index + assert len(enum_array) == 0 # Unsupported encodings def test_enum_encode_with_any_array(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" value = {"animal": "dog"} array = numpy.array([value]) - enum_array = Animal.encode(array) - assert enum_array[0] == value + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_any_scalar_array(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" value = 1.5 array = numpy.array(value) - enum_array = Animal.encode(array) - assert enum_array == value + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_any_sequence(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" sequence = memoryview(b"DOG") - enum_array = Animal.encode(sequence) - assert enum_array[0] == sequence[0] - - -def test_enum_encode_with_anything(): - """Does not encode when called with unsupported types (noop).""" - anything = {object()} - enum_array = Animal.encode(anything) - assert enum_array == anything + with pytest.raises(NotImplementedError): + Animal.encode(sequence) diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index d69eb098a0..ab4283b75d 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,3 +1,21 @@ -from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray +from openfisca_core.types import ( + Array, + ArrayLike, + DTypeEnum, + DTypeInt, + DTypeObject, + DTypeStr, + Enum, + EnumArray, +) -__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"] +__all__ = [ + "Array", + "ArrayLike", + "DTypeEnum", + "DTypeInt", + "DTypeObject", + "DTypeStr", + "Enum", + "EnumArray", +] From 0195451bc67426bee0077d7a5f0499c4b81f863d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Mon, 7 Oct 2024 15:56:51 +0200 Subject: [PATCH 03/10] refactor(enums): remove magic methods (#1267) --- openfisca_core/indexed_enums/enum.py | 32 ---------------------------- setup.cfg | 2 +- 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index b3f7f1dbb6..3909e38ea5 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -55,18 +55,6 @@ class Enum(t.Enum): >>> Housing.TENANT != Housing.TENANT False - >>> Housing.TENANT > Housing.TENANT - False - - >>> Housing.TENANT < Housing.TENANT - False - - >>> Housing.TENANT >= Housing.TENANT - True - - >>> Housing.TENANT <= Housing.TENANT - True - >>> Housing.TENANT.index 1 @@ -127,26 +115,6 @@ def __ne__(self, other: object) -> bool: return NotImplemented return self.index != other.index - def __lt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index < other.index - - def __le__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index <= other.index - - def __gt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index > other.index - - def __ge__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index >= other.index - #: :meth:`.__hash__` must also be defined so as to stay hashable. __hash__ = object.__hash__ diff --git a/setup.cfg b/setup.cfg index fd18e5ab3f..9664127cfd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,7 @@ disable = all enable = C0115, C0116, R0401 per-file-ignores = types.py:C0115,C0116 - /tests/:C0116 + /tests/:C0115,C0116 score = no [isort] From b5b79685046a72b3375b68eb71b0a8b803e2138b Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 02:38:27 +0200 Subject: [PATCH 04/10] fix(enums): do actual indexing (#1267) --- openfisca_core/indexed_enums/__init__.py | 2 + openfisca_core/indexed_enums/_enum_type.py | 113 ++++++++++++++++++ openfisca_core/indexed_enums/_type_guards.py | 68 +++++++++++ openfisca_core/indexed_enums/enum.py | 91 ++++++-------- openfisca_core/indexed_enums/enum_array.py | 2 +- .../indexed_enums/tests/test_enum.py | 8 +- openfisca_core/indexed_enums/types.py | 51 ++++++-- openfisca_core/types.py | 10 +- tests/core/tools/test_assert_near.py | 2 +- 9 files changed, 279 insertions(+), 68 deletions(-) create mode 100644 openfisca_core/indexed_enums/_enum_type.py create mode 100644 openfisca_core/indexed_enums/_type_guards.py diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index a6a452511f..10bbd3d980 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,6 +1,7 @@ """Enumerations for variables with a limited set of possible values.""" from . import types +from ._enum_type import EnumType from .config import ENUM_ARRAY_DTYPE from .enum import Enum from .enum_array import EnumArray @@ -9,5 +10,6 @@ "ENUM_ARRAY_DTYPE", "Enum", "EnumArray", + "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py new file mode 100644 index 0000000000..0b2f26b352 --- /dev/null +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from typing import final + +import numpy + +from . import types as t + + +def _item_list(enum_class: type[t.Enum]) -> t.ItemList: + """Return the non-vectorised list of enum items.""" + return [ + (index, name, value) + for index, (name, value) in enumerate(enum_class.__members__.items()) + ] + + +def _item_dtype(enum_class: type[t.Enum]) -> t.RecDType: + """Return the dtype of the indexed enum's items.""" + size = max(map(len, enum_class.__members__.keys())) + return numpy.dtype( + ( + numpy.generic, + { + "index": (t.EnumDType, 0), + "name": (f"U{size}", 2), + "enum": (enum_class, 2 + size * 4), + }, + ) + ) + + +def _item_array(enum_class: type[t.Enum]) -> t.RecArray: + """Return the indexed enum's items.""" + items = _item_list(enum_class) + dtype = _item_dtype(enum_class) + array = numpy.array(items, dtype=dtype) + return array.view(numpy.recarray) + + +@final +class EnumType(t.EnumType): + """Meta class for creating an indexed :class:`.Enum`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Enum(enum.Enum, metaclass=enum.EnumType): + ... pass + + >>> Enum.items + Traceback (most recent call last): + AttributeError: type object 'Enum' has no attribute 'items' + + >>> class Housing(Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.items + rec.array([(0, 'OWNER', ), ...]) + + >>> Housing.indices + array([0, 1], dtype=int16) + + >>> Housing.names + array(['OWNER', 'TENANT'], dtype='>> Housing.enums + array([, ], dtype...) + + """ + + #: The items of the indexed enum class. + items: t.RecArray + + @property + def indices(cls) -> t.IndexArray: + """Return the indices of the indexed enum class.""" + return cls.items.index + + @property + def names(cls) -> t.StrArray: + """Return the names of the indexed enum class.""" + return cls.items.name + + @property + def enums(cls) -> t.ObjArray: + """Return the members of the indexed enum class.""" + return cls.items.enum + + def __new__( + metacls, + cls: str, + bases: tuple[type, ...], + classdict: t.EnumDict, + **kwds: object, + ) -> t.EnumType: + """Create a new indexed enum class.""" + # Create the enum class. + enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) + + # If the enum class has no members, return it as is. + if not enum_class.__members__: + return enum_class + + # Add the items attribute to the enum class. + enum_class.items = _item_array(enum_class) + + # Return the modified enum class. + return enum_class + + def __dir__(cls) -> list[str]: + return sorted({"items", "indices", "names", "enums", *super().__dir__()}) diff --git a/openfisca_core/indexed_enums/_type_guards.py b/openfisca_core/indexed_enums/_type_guards.py new file mode 100644 index 0000000000..3caf1859b3 --- /dev/null +++ b/openfisca_core/indexed_enums/_type_guards.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing_extensions import TypeIs + +import numpy + +from . import types as t + + +def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray | t.IntArray]: + """Narrow the type of a given array to an array of :obj:`numpy.integer`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.integer`, False otherwise. + + Examples: + >>> import numpy + + >>> array = numpy.array([1], dtype=numpy.int16) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1], dtype=numpy.int32) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_int_array(array) + False + + """ + return numpy.issubdtype(array.dtype, numpy.integer) + + +def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: + """Narrow the type of a given array to an array of :obj:`numpy.str_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.str_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = numpy.array([Housing.OWNER]) + >>> _is_str_array(array) + False + + >>> array = numpy.array(["owner"]) + >>> _is_str_array(array) + True + + """ + return numpy.issubdtype(array.dtype, str) + + +__all__ = ["_is_int_array", "_is_str_array"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 3909e38ea5..069cc8fa98 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -3,11 +3,12 @@ import numpy from . import types as t -from .config import ENUM_ARRAY_DTYPE +from ._enum_type import EnumType +from ._type_guards import _is_int_array, _is_str_array from .enum_array import EnumArray -class Enum(t.Enum): +class Enum(t.Enum, metaclass=EnumType): """Enum based on `enum34 `_. Its items have an :class:`int` index, useful and performant when running @@ -115,20 +116,19 @@ def __ne__(self, other: object) -> bool: return NotImplemented return self.index != other.index - #: :meth:`.__hash__` must also be defined so as to stay hashable. - __hash__ = object.__hash__ + def __hash__(self) -> int: + return hash(self.index) @classmethod def encode( cls, array: ( EnumArray - | t.Array[t.DTypeStr] - | t.Array[t.DTypeInt] - | t.Array[t.DTypeEnum] - | t.Array[t.DTypeObject] - | t.ArrayLike[str] + | t.IntArray + | t.StrArray + | t.ObjArray | t.ArrayLike[int] + | t.ArrayLike[str] | t.ArrayLike[t.Enum] ), ) -> EnumArray: @@ -143,7 +143,6 @@ def encode( Raises: TypeError: If ``array`` is a scalar :class:`~numpy.ndarray`. TypeError: If ``array`` is of a diffent :class:`.Enum` type. - NotImplementedError: If ``array`` is of an unsupported type. Examples: >>> import numpy @@ -187,7 +186,7 @@ def encode( >>> array = numpy.array([b"TENANT"]) >>> enum_array = Housing.encode(array) Traceback (most recent call last): - NotImplementedError: Unsupported encoding: bytes48. + TypeError: Failed to encode "[b'TENANT']" of type 'bytes_', as i... .. seealso:: :meth:`.EnumArray.decode` for decoding. @@ -200,7 +199,7 @@ def encode( return cls.encode(numpy.array(array)) if array.size == 0: - return EnumArray(array, cls) + return EnumArray(numpy.array([]), cls) if array.ndim == 0: msg = ( @@ -209,49 +208,37 @@ def encode( ) raise TypeError(msg) - # Enum data type array - if numpy.issubdtype(array.dtype, t.DTypeEnum): - indexes = numpy.array([item.index for item in cls], t.DTypeEnum) - return EnumArray(indexes[array[array < indexes.size]], cls) - # Integer array - if numpy.issubdtype(array.dtype, int): - array = numpy.array(array, dtype=t.DTypeEnum) - return cls.encode(array) + if _is_int_array(array): + indices = numpy.array(array[array < len(cls.items)], dtype=t.EnumDType) + return EnumArray(indices, cls) # String array - if numpy.issubdtype(array.dtype, t.DTypeStr): - enums = [cls.__members__[key] for key in array if key in cls.__members__] - return cls.encode(enums) - - # Enum items arrays - if numpy.issubdtype(array.dtype, t.DTypeObject): - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if cls.__name__ is array[0].__class__.__name__: - array = numpy.select( - [array == item for item in array[0].__class__], - [item.index for item in array[0].__class__], - ).astype(ENUM_ARRAY_DTYPE) - return EnumArray(array, cls) - - msg = ( - f"Diverging enum types are not supported: expected {cls.__name__}, " - f"but got {array[0].__class__.__name__} instead." - ) - raise TypeError(msg) - - msg = f"Unsupported encoding: {array.dtype.name}." - raise NotImplementedError(msg) + if _is_str_array(array): + indices = cls.items[numpy.isin(cls.names, array)].index + return EnumArray(indices, cls) + + # Ensure we are comparing the comparable. The problem this fixes: + # On entering this method "cls" will generally come from + # variable.possible_values, while the array values may come from + # directly importing a module containing an Enum class. However, + # variables (and hence their possible_values) are loaded by a call + # to load_module, which gives them a different identity from the + # ones imported in the usual way. + # + # So, instead of relying on the "cls" passed in, we use only its + # name to check that the values in the array, if non-empty, are of + # the right type. + if cls.__name__ is array[0].__class__.__name__: + indices = cls.items[numpy.isin(cls.enums, array)].index + return EnumArray(indices, cls) + + msg = ( + f"Failed to encode \"{array}\" of type '{array[0].__class__.__name__}', " + "as it is not supported. Please, try again with an array of " + f"'{int.__name__}', '{str.__name__}', or '{cls.__name__}'." + ) + raise TypeError(msg) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 334083dd3e..06fc1fbc93 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -74,7 +74,7 @@ class EnumArray(t.EnumArray): def __new__( cls, - input_array: t.Array[t.DTypeEnum], + input_array: t.IndexArray, possible_values: None | type[t.Enum] = None, ) -> Self: """See comment above.""" diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 059918f7d0..c77b9ddac2 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -27,7 +27,7 @@ def test_enum_encode_with_array_of_enum(): def test_enum_encode_with_enum_sequence(): """Does encode when called with an enum sequence.""" - sequence = list(Animal) + sequence = list(Animal) + list(Colour) enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -89,7 +89,7 @@ def test_enum_encode_with_array_of_string(): def test_enum_encode_with_str_sequence(): """Does encode when called with a str sequence.""" - sequence = ("DOG",) + sequence = ("DOG", "JAIBA") enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -130,5 +130,5 @@ def test_enum_encode_with_any_scalar_array(): def test_enum_encode_with_any_sequence(): """Does not encode when called with unsupported types.""" sequence = memoryview(b"DOG") - with pytest.raises(NotImplementedError): - Animal.encode(sequence) + enum_array = Animal.encode(sequence) + assert len(enum_array) == 0 diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index ab4283b75d..a16b037509 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,21 +1,56 @@ +from typing import Any +from typing_extensions import TypeAlias + from openfisca_core.types import ( Array, ArrayLike, - DTypeEnum, - DTypeInt, - DTypeObject, - DTypeStr, + DTypeEnum as EnumDType, + DTypeGeneric as AnyDType, + DTypeInt as IntDType, + DTypeLike, + DTypeObject as ObjDType, + DTypeStr as StrDType, Enum, EnumArray, + EnumType, ) +import enum + +import numpy + +#: Type for enum dicts. +EnumDict: TypeAlias = enum._EnumDict # noqa: SLF001 + +#: Type for the non-vectorised list of enum items. +ItemList: TypeAlias = list[tuple[int, str, Enum]] + +#: Type for record arrays data type. +RecDType: TypeAlias = numpy.dtype[numpy.void] + +#: Type for record arrays. +RecArray: TypeAlias = numpy.recarray[object, Any] + +#: Type for enum indices arrays. +IndexArray: TypeAlias = Array[EnumDType] + +#: Type for int arrays. +IntArray: TypeAlias = Array[IntDType] + +#: Type for str arrays. +StrArray: TypeAlias = Array[StrDType] + +#: Type for object arrays. +ObjArray: TypeAlias = Array[ObjDType] + +#: Type for generic arrays. +AnyArray: TypeAlias = Array[AnyDType] + __all__ = [ "Array", "ArrayLike", - "DTypeEnum", - "DTypeInt", - "DTypeObject", - "DTypeStr", + "DTypeLike", "Enum", "EnumArray", + "EnumType", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index b922cde092..d81a0789a9 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Iterable, Sequence, Sized -from numpy.typing import NDArray +from numpy.typing import DTypeLike, NDArray from typing import Any, NewType, TypeVar, Union from typing_extensions import Protocol, Self, TypeAlias @@ -108,7 +108,10 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class Enum(enum.Enum, metaclass=enum.EnumMeta): +class EnumType(enum.EnumMeta): ... + + +class Enum(enum.Enum, metaclass=EnumType): index: int @@ -239,3 +242,6 @@ def __call__( class Params(Protocol): def __call__(self, instant: Instant, /) -> ParameterNodeAtInstant: ... + + +__all__ = ["DTypeLike"] diff --git a/tests/core/tools/test_assert_near.py b/tests/core/tools/test_assert_near.py index c351be0f9c..bdcb589b44 100644 --- a/tests/core/tools/test_assert_near.py +++ b/tests/core/tools/test_assert_near.py @@ -21,5 +21,5 @@ def test_enum_2(tax_benefit_system) -> None: "housing_occupancy_status" ].possible_values value = possible_values.encode(numpy.array(["tenant", "owner"])) - expected_value = ["tenant", "owner"] + expected_value = ["owner", "tenant"] assert_near(value, expected_value) From cbc7fdb19cb257e6b244479a611ac5b81f26db34 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 02:52:18 +0200 Subject: [PATCH 05/10] build(test): add enums to test path (#1267) --- openfisca_tasks/test_code.mk | 1 + setup.cfg | 1 + 2 files changed, 2 insertions(+) diff --git a/openfisca_tasks/test_code.mk b/openfisca_tasks/test_code.mk index ed2d435ed9..f2ab7247ab 100644 --- a/openfisca_tasks/test_code.mk +++ b/openfisca_tasks/test_code.mk @@ -40,6 +40,7 @@ test-core: $(shell git ls-files "*test_*.py") openfisca_core/data_storage \ openfisca_core/entities \ openfisca_core/holders \ + openfisca_core/indexed_enums \ openfisca_core/periods \ openfisca_core/projectors @PYTEST_ADDOPTS="$${PYTEST_ADDOPTS} ${pytest_args}" \ diff --git a/setup.cfg b/setup.cfg index 9664127cfd..60ac8faf07 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ include-in-doctest = openfisca_core/commons openfisca_core/entities openfisca_core/holders + openfisca_core/indexed_enums openfisca_core/periods openfisca_core/projectors max-line-length = 88 From cd6ca32964fbdb2fd501f6e8d92e639c3cf35a30 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:37:01 +0200 Subject: [PATCH 06/10] fix(enums): failing doctest when AttributeError (#1267) --- openfisca_core/indexed_enums/_enum_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 0b2f26b352..777d611ba5 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -50,7 +50,7 @@ class EnumType(t.EnumType): >>> Enum.items Traceback (most recent call last): - AttributeError: type object 'Enum' has no attribute 'items' + AttributeError: ... >>> class Housing(Enum): ... OWNER = "Owner" From 38f99e0cd11170431fdaad144935a5959bcd47d4 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:54:43 +0200 Subject: [PATCH 07/10] fix(enums): ensure __eq__ gives a numpy array (#1267) --- openfisca_core/indexed_enums/enum_array.py | 6 ++++-- openfisca_core/indexed_enums/types.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 06fc1fbc93..2e9ebf1483 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -141,8 +141,10 @@ def __eq__(self, other: object) -> bool: """ if other.__class__.__name__ is self.possible_values.__name__: return self.view(numpy.ndarray) == other.index - - return self.view(numpy.ndarray) == other + is_eq = self.view(numpy.ndarray) == other + if isinstance(is_eq, numpy.ndarray): + return is_eq + return numpy.array([is_eq], dtype=t.BoolDType) def __ne__(self, other: object) -> bool: """Inequality. diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index a16b037509..ffc2cc9f2a 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -4,6 +4,7 @@ from openfisca_core.types import ( Array, ArrayLike, + DTypeBool as BoolDType, DTypeEnum as EnumDType, DTypeGeneric as AnyDType, DTypeInt as IntDType, @@ -49,6 +50,7 @@ __all__ = [ "Array", "ArrayLike", + "BoolDType", "DTypeLike", "Enum", "EnumArray", From 8074af4bb88af3d25c6aea83fc53bc084dafeea0 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 04:22:03 +0200 Subject: [PATCH 08/10] fix(enums): fix __repr__ (#1267) --- openfisca_core/data_storage/on_disk_storage.py | 2 +- openfisca_core/indexed_enums/_enum_type.py | 4 ++-- openfisca_core/indexed_enums/enum.py | 15 +++++++++------ openfisca_core/indexed_enums/enum_array.py | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 3d0ef7fc13..99cfe56dd1 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -87,7 +87,7 @@ def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: ... storage = data_storage.OnDiskStorage(directory) ... storage.put(value, period) ... storage._decode_file(storage._files[period]) - EnumArray([]) + EnumArray(Housing.TENANT) """ enum = self._enums.get(file) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 777d611ba5..0152595eac 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -57,7 +57,7 @@ class EnumType(t.EnumType): ... TENANT = "Tenant" >>> Housing.items - rec.array([(0, 'OWNER', ), ...]) + rec.array([(0, 'OWNER', Housing.OWNER), (1, 'TENANT', Housing.TENAN...) >>> Housing.indices array([0, 1], dtype=int16) @@ -66,7 +66,7 @@ class EnumType(t.EnumType): array(['OWNER', 'TENANT'], dtype='>> Housing.enums - array([, ], dtype...) + array([Housing.OWNER, Housing.TENANT], dtype=object) """ diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 069cc8fa98..938335bcb5 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -27,22 +27,22 @@ class Enum(t.Enum, metaclass=EnumType): "" >>> repr(Housing.TENANT) - "" + 'Housing.TENANT' >>> str(Housing.TENANT) 'Housing.TENANT' >>> dict([(Housing.TENANT, Housing.TENANT.value)]) - {: 'Tenant'} + {Housing.TENANT: 'Tenant'} >>> list(Housing) - [, , ...] + [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] >>> Housing["TENANT"] - + Housing.TENANT >>> Housing("Tenant") - + Housing.TENANT >>> Housing.TENANT in Housing True @@ -106,6 +106,9 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: """ self.index = len(self._member_names_) + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" + def __eq__(self, other: object) -> bool: if not isinstance(other, Enum): return NotImplemented @@ -158,7 +161,7 @@ def encode( >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) >>> Housing.encode(enum_array) - EnumArray([]) + EnumArray(Housing.TENANT) # Array of Enum diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 2e9ebf1483..807be2ec55 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -36,7 +36,7 @@ class EnumArray(t.EnumArray): "" >>> repr(enum_array) - "EnumArray([])" + 'EnumArray(Housing.TENANT)' >>> str(enum_array) "['TENANT']" @@ -55,14 +55,14 @@ class EnumArray(t.EnumArray): >>> enum_array = enum.EnumArray(list(Housing), Housing) >>> enum_array[Housing.TENANT.index] - + Housing.TENANT >>> class OccupancyStatus(variables.Variable): ... value_type = enum.Enum ... possible_values = Housing >>> enum.EnumArray(array, OccupancyStatus.possible_values) - EnumArray([]) + EnumArray(Housing.TENANT) .. _Subclassing ndarray: https://numpy.org/doc/stable/user/basics.subclassing.html @@ -229,7 +229,7 @@ def decode(self) -> numpy.object_: >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) >>> enum_array.decode() - array([], dtype=object) + array([Housing.TENANT], dtype=object) """ return numpy.select( @@ -264,7 +264,8 @@ def decode_to_str(self) -> numpy.str_: ) def __repr__(self) -> str: - return f"{self.__class__.__name__}({self.decode()!s})" + items = ", ".join(str(item) for item in self.decode()) + return f"{self.__class__.__name__}({items})" def __str__(self) -> str: return str(self.decode_to_str()) From 52592246e6e0824ab8f56e1659363379bb3ddf1d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 04:31:41 +0200 Subject: [PATCH 09/10] test(enums): update str eq test (#1267) --- openfisca_core/indexed_enums/enum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 938335bcb5..a291acbd6d 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -181,8 +181,8 @@ def encode( >>> array = numpy.array(["TENANT"]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) # Array of bytes From 6dfc93f4127943f7453bc2720f0f9ed23f9aad9d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:10:16 +0200 Subject: [PATCH 10/10] chore: version bump (fixes #1267) --- CHANGELOG.md | 47 ++++++++++++++++++++++ openfisca_core/indexed_enums/_enum_type.py | 3 ++ setup.py | 2 +- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74f86b175a..e2a2f70cdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +## 42.1.0 [#1273](https://github.com/openfisca/openfisca-core/pull/1273) + +#### New features + +- Introduce `indexed_enums.EnumType` + - Allows for actually fancy indexing `indexed_enums.Enum` + +#### Technical changes + +- Fix doctests + - Now `pytest openfisca_core/indexed_enums` runs without errors +- Fix bug in `Enum.encode` when passing a scalar + - Still raises `TypeError` but with an explanation of why it fails +- Fix bug in `Enum.encode` when encoding values not present in the enum + - When encoding values not present in an enum, `Enum.encode` always encoded + the first item of the enum + - Now, it correctly encodes only the values requested that exist in the enum + +##### Before + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([0]) +``` + +##### After + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([]) + +TestEnum.encode([0,1,2,5]) +# EnumArray([ ]) +``` + ### 42.0.8 [#1272](https://github.com/openfisca/openfisca-core/pull/1272) #### Documentation diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 0152595eac..4208ab3ce7 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -111,3 +111,6 @@ def __new__( def __dir__(cls) -> list[str]: return sorted({"items", "indices", "names", "enums", *super().__dir__()}) + + +__all__ = ["EnumType"] diff --git a/setup.py b/setup.py index 491479ccb5..8cdaa61739 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.0.8", + version="42.1.0", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[