From b08e984a100ba2d8dd66691e31b23a984daedf64 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 2 Nov 2023 12:17:13 -0400 Subject: [PATCH 1/5] Add support for collections.abc.Collection type hints --- .../typehints/native_type_compatibility.py | 9 ++++ .../native_type_compatibility_test.py | 10 +++- .../python/apache_beam/typehints/typehints.py | 51 +++++++++++++++++++ .../apache_beam/typehints/typehints_test.py | 25 +++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py index 67cb2b8c3f03..b2960ba0c7b7 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py @@ -48,6 +48,7 @@ _CONVERTED_COLLECTIONS = [ collections.abc.Set, collections.abc.MutableSet, + collections.abc.Collection, ] @@ -118,6 +119,10 @@ def _match_is_exactly_iterable(user_type): return getattr(user_type, '__origin__', None) is expected_origin +def _match_is_exactly_collection(user_type): + return getattr(user_type, '__origin__', None) is collections.abc.Collection + + def match_is_named_tuple(user_type): return ( _safe_issubclass(user_type, typing.Tuple) and @@ -322,6 +327,10 @@ def convert_to_beam_type(typ): match=_match_issubclass(typing.Iterator), arity=1, beam_type=typehints.Iterator), + _TypeMapEntry( + match=_match_is_exactly_collection, + arity=1, + beam_type=typehints.Collection), ] # Find the first matching entry. diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py index 9c2762dff710..2e6db6a7733c 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py @@ -189,7 +189,15 @@ def test_convert_to_beam_type_with_collections_types(self): ( 'enum mutable set', collections.abc.MutableSet[_TestEnum], - typehints.Set[_TestEnum]) + typehints.Set[_TestEnum]), + ( + 'collection enum', + collections.abc.Collection[_TestEnum], + typehints.Collection[_TestEnum]), + ( + 'collection of tuples', + collections.abc.Collection[tuple[str, int]], + typehints.Collection[typehints.Tuple[str, int]]), ] for test_case in test_cases: diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index 238bf8c321d6..2f031fee13cd 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -82,6 +82,7 @@ 'Dict', 'Set', 'FrozenSet', + 'Collection', 'Iterable', 'Iterator', 'Generator', @@ -1017,6 +1018,55 @@ def __getitem__(self, type_param): FrozenSetTypeConstraint = FrozenSetHint.FrozenSetTypeConstraint +class CollectionHint(CompositeTypeHint): + """ A Collection type-hint. + + Collection[X] defines a type-hint for a collection of homogenous types. 'X' + may be either a built-in Python type or another nested TypeConstraint. + + This represents a collections.abc.Collection type, which implements + __contains__, __iter__, and __len__. This acts as a parent type for + sets but has fewer guarantees for mixins. + """ + class CollectionTypeConstraint(SequenceTypeConstraint): + def __init__(self, type_param): + super().__init__(type_param, abc.Collection) + + def __repr__(self): + return 'Collection[%s]' % repr(self.inner_type) + + @staticmethod + def _is_subclass_constraint(sub): + return isinstance(sub, CollectionTypeConstraint) or isinstance( + sub, SetTypeConstraint) or isinstance(sub, FrozenSetTypeConstraint) + + def _consistent_with_check_(self, sub): + if self._is_subclass_constraint(sub): + return is_consistent_with(sub.inner_type, self.inner_type) + elif isinstance(sub, TupleConstraint): + if not sub.tuple_types: + # The empty tuple is consistent with Iterator[T] for any T. + return True + # Each element in the hetrogenious tuple must be consistent with + # the iterator type. + # E.g. Tuple[A, B] < Iterable[C] if A < C and B < C. + return all( + is_consistent_with(elem, self.inner_type) + for elem in sub.tuple_types) + elif not isinstance(sub, TypeConstraint): + if getattr(sub, '__origin__', None) is not None: + return issubclass(sub, abc.Collection) + return False + + def __getitem__(self, type_param): + validate_composite_type_param( + type_param, error_msg_prefix='Parameter to a Collection hint') + return self.CollectionTypeConstraint(type_param) + + +CollectionTypeConstraint = CollectionHint.CollectionTypeConstraint + + class IterableHint(CompositeTypeHint): """An Iterable type-hint. @@ -1187,6 +1237,7 @@ def __getitem__(self, type_params): Dict = DictHint() Set = SetHint() FrozenSet = FrozenSetHint() +Collection = CollectionHint() Iterable = IterableHint() Iterator = IteratorHint() Generator = GeneratorHint() diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index 7f8c322f9f40..07cf4e85fc00 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -865,6 +865,31 @@ class FrozenSetHintTestCase(BaseSetHintTest.CommonTests): string_type = 'FrozenSet' +class CollectionHintTestCase(TypeHintTestCase): + def test_type_constraint_compatibility(self): + self.assertCompatible(typehints.Collection[int], typehints.Set[int]) + self.assertCompatible(typehints.Iterable[int], typehints.Collection[int]) + self.assertCompatible(typehints.Collection[int], typehints.FrozenSet[int]) + self.assertCompatible( + typehints.Collection[typehints.Any], typehints.Collection[int]) + self.assertCompatible(typehints.Collection[int], typehints.Tuple[int]) + + def test_one_way_compatibility(self): + self.assertNotCompatible(typehints.Set[int], typehints.Collection[int]) + self.assertNotCompatible( + typehints.FrozenSet[int], typehints.Collection[int]) + self.assertNotCompatible(typehints.Tuple[int], typehints.Collection[int]) + + def test_getitem_invalid_composite_type_param(self): + with self.assertRaises(TypeError) as e: + typehints.Collection[5] + self.assertEqual( + 'Parameter to a Collection hint must be a ' + 'non-sequence, a type, or a TypeConstraint. 5 is ' + 'an instance of int.', + e.exception.args[0]) + + class IterableHintTestCase(TypeHintTestCase): def test_getitem_invalid_composite_type_param(self): with self.assertRaises(TypeError) as e: From c7f0b972b08b8d472b0acecdb3117abd2a34ff07 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 2 Nov 2023 12:22:10 -0400 Subject: [PATCH 2/5] Extra test cases --- sdks/python/apache_beam/typehints/typehints_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index 07cf4e85fc00..d7e3832f7ec8 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -879,6 +879,7 @@ def test_one_way_compatibility(self): self.assertNotCompatible( typehints.FrozenSet[int], typehints.Collection[int]) self.assertNotCompatible(typehints.Tuple[int], typehints.Collection[int]) + self.assertNotCompatible(typehints.Collection[int], typehints.Iterable[int]) def test_getitem_invalid_composite_type_param(self): with self.assertRaises(TypeError) as e: @@ -918,6 +919,7 @@ def test_compatibility(self): self.assertCompatible( typehints.Iterable[typehints.Any], typehints.List[typehints.Tuple[int, bool]]) + self.assertCompatible(typehints.Iterable[int], typehints.Collection[int]) def test_tuple_compatibility(self): self.assertCompatible(typehints.Iterable[int], typehints.Tuple[int, ...]) From a1f920a3cbd8e12ae4af53e56507653aa262738c Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 2 Nov 2023 13:57:10 -0400 Subject: [PATCH 3/5] Merge subclass helper (linting) --- sdks/python/apache_beam/typehints/typehints.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index 2f031fee13cd..c7da528c0b4f 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -1037,8 +1037,11 @@ def __repr__(self): @staticmethod def _is_subclass_constraint(sub): - return isinstance(sub, CollectionTypeConstraint) or isinstance( - sub, SetTypeConstraint) or isinstance(sub, FrozenSetTypeConstraint) + return isinstance( + sub, ( + CollectionTypeConstraint, + FrozenSetTypeConstraint, + SetTypeConstraint)) def _consistent_with_check_(self, sub): if self._is_subclass_constraint(sub): From dac4061ca3f9f0f5093c42bb8d018ba20d84b0ea Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Mon, 6 Nov 2023 14:49:53 -0500 Subject: [PATCH 4/5] Change comment, refine general check --- sdks/python/apache_beam/typehints/typehints.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index c7da528c0b4f..d18767d2847b 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -1051,14 +1051,16 @@ def _consistent_with_check_(self, sub): # The empty tuple is consistent with Iterator[T] for any T. return True # Each element in the hetrogenious tuple must be consistent with - # the iterator type. - # E.g. Tuple[A, B] < Iterable[C] if A < C and B < C. + # the collection type. + # E.g. Tuple[A, B] < Collection[C] if A < C and B < C. return all( is_consistent_with(elem, self.inner_type) for elem in sub.tuple_types) elif not isinstance(sub, TypeConstraint): - if getattr(sub, '__origin__', None) is not None: - return issubclass(sub, abc.Collection) + if getattr(sub, '__origin__', None) is not None and getattr( + sub, '__args__', None) is not None: + return issubclass(sub, abc.Collection) and is_consistent_with( + sub.__args__, self.inner_type) return False def __getitem__(self, type_param): From c2148fa8c9ac023eb8ffb67d69ced1ad21b25383 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Mon, 6 Nov 2023 16:43:30 -0500 Subject: [PATCH 5/5] Add extra test case, TODO --- sdks/python/apache_beam/typehints/typehints.py | 2 ++ sdks/python/apache_beam/typehints/typehints_test.py | 1 + 2 files changed, 3 insertions(+) diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index d18767d2847b..5726a8a8ca92 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -1043,6 +1043,8 @@ def _is_subclass_constraint(sub): FrozenSetTypeConstraint, SetTypeConstraint)) + # TODO(https://github.com/apache/beam/issues/29135): allow for consistency + # with Mapping types def _consistent_with_check_(self, sub): if self._is_subclass_constraint(sub): return is_consistent_with(sub.inner_type, self.inner_type) diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index d7e3832f7ec8..1d938edcc24b 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -873,6 +873,7 @@ def test_type_constraint_compatibility(self): self.assertCompatible( typehints.Collection[typehints.Any], typehints.Collection[int]) self.assertCompatible(typehints.Collection[int], typehints.Tuple[int]) + self.assertCompatible(typehints.Any, typehints.Collection[str]) def test_one_way_compatibility(self): self.assertNotCompatible(typehints.Set[int], typehints.Collection[int])