Skip to content

bpo-40755: Add missing multiset operations to Counter() #20339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
May 28, 2020
41 changes: 41 additions & 0 deletions Doc/library/collections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,47 @@ For example::
>>> sorted(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']

.. method:: isdisjoint(other)

True if none of the elements in *self* overlap with those in *other*.
Negative or missing counts are ignored.
Logically equivalent to: ``not (+self) & (+other)``

.. versionadded:: 3.10

.. method:: isequal(other)

Test whether counts agree exactly.
Negative or missing counts are treated as zero.

This method works differently than the inherited :meth:`__eq__` method
which treats negative or missing counts as distinct from zero::

>>> Counter(a=1, b=0).isequal(Counter(a=1))
True
>>> Counter(a=1, b=0) == Counter(a=1)
False

Logically equivalent to: ``+self == +other``

.. versionadded:: 3.10

.. method:: issubset(other)

True if the counts in *self* are less than or equal to those in *other*.
Negative or missing counts are treated as zero.
Logically equivalent to: ``not self - (+other)``

.. versionadded:: 3.10

.. method:: issuperset(other)

True if the counts in *self* are greater than or equal to those in *other*.
Negative or missing counts are treated as zero.
Logically equivalent to: ``not other - (+self)``

.. versionadded:: 3.10

.. method:: most_common([n])

Return a list of the *n* most common elements and their counts from the
Expand Down
110 changes: 104 additions & 6 deletions Lib/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,12 +712,24 @@ def __repr__(self):
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
#
# Rich comparison operators for multiset subset and superset tests
# are deliberately omitted due to semantic conflicts with the
# existing inherited dict equality method. Subset and superset
# semantics ignore zero counts and require that p≤q ∧ p≥q → p=q;
# however, that would not be the case for p=Counter(a=1, b=0)
# and q=Counter(a=1) where the dictionaries are not equal.
# When the multiplicities are all zero or one, multiset operations
# are guaranteed to be equivalent to the corresponding operations
# for regular sets.
# Given counter multisets such as:
# cp = Counter(a=1, b=0, c=1)
# cq = Counter(c=1, d=0, e=1)
# The corresponding regular sets would be:
# sp = {'a', 'c'}
# sq = {'c', 'e'}
# All of the following relations would hold:
# set(cp + cq) == sp | sq
# set(cp - cq) == sp - sq
# set(cp | cq) == sp | sq
# set(cp & cq) == sp & sq
# cp.isequal(cq) == (sp == sq)
# cp.issubset(cq) == sp.issubset(sq)
# cp.issuperset(cq) == sp.issuperset(sq)
# cp.isdisjoint(cq) == sp.isdisjoint(sq)

def __add__(self, other):
'''Add counts from two counters.
Expand Down Expand Up @@ -876,6 +888,92 @@ def __iand__(self, other):
self[elem] = other_count
return self._keep_positive()

def isequal(self, other):
''' Test whether counts agree exactly.

Negative or missing counts are treated as zero.

This is different than the inherited __eq__() method which
treats negative or missing counts as distinct from zero:

>>> Counter(a=1, b=0).isequal(Counter(a=1))
True
>>> Counter(a=1, b=0) == Counter(a=1)
False

Logically equivalent to: +self == +other
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem in set(self) | set(other):
left = self[elem]
right = other[elem]
if left == right:
continue
if left < 0:
left = 0
if right < 0:
right = 0
if left != right:
return False
return True

def issubset(self, other):
'''True if the counts in self are less than or equal to those in other.

Negative or missing counts are treated as zero.

Logically equivalent to: not self - (+other)
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem, count in self.items():
other_count = other[elem]
if other_count < 0:
other_count = 0
if count > other_count:
return False
return True

def issuperset(self, other):
'''True if the counts in self are greater than or equal to those in other.

Negative or missing counts are treated as zero.

Logically equivalent to: not other - (+self)
'''
if not isinstance(other, Counter):
other = Counter(other)
return other.issubset(self)

def isdisjoint(self, other):
'''True if none of the elements in self overlap with those in other.

Negative or missing counts are ignored.

Logically equivalent to: not (+self) & (+other)
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem, count in self.items():
if count > 0 and other[elem] > 0:
return False
return True

# Rich comparison operators for multiset subset and superset tests
# have been deliberately omitted due to semantic conflicts with the
# existing inherited dict equality method. Subset and superset
# semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
# however, that would not be the case for p=Counter(a=1, b=0)
# and q=Counter(a=1) where the dictionaries are not equal.

def _omitted(self, other):
raise TypeError(
'Rich comparison operators have been deliberately omitted. '
'Use the isequal(), issubset(), and issuperset() methods instead.')

__lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted


########################################################################
### ChainMap
Expand Down
59 changes: 59 additions & 0 deletions Lib/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import operator
import pickle
from random import choice, randrange
from itertools import product, chain, combinations
import string
import sys
from test import support
Expand Down Expand Up @@ -2219,6 +2220,64 @@ def test_helper_function(self):
self.assertTrue(c.called)
self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })

def test_multiset_operations_equivalent_to_set_operations(self):
# When the multiplicities are all zero or one, multiset operations
# are guaranteed to be equivalent to the corresponding operations
# for regular sets.
s = list(product(('a', 'b', 'c'), range(2)))
powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
counters = [Counter(dict(groups)) for groups in powerset]
for cp, cq in product(counters, repeat=2):
sp = set(cp.elements())
sq = set(cq.elements())
self.assertEqual(set(cp + cq), sp | sq)
self.assertEqual(set(cp - cq), sp - sq)
self.assertEqual(set(cp | cq), sp | sq)
self.assertEqual(set(cp & cq), sp & sq)
self.assertEqual(cp.isequal(cq), sp == sq)
self.assertEqual(cp.issubset(cq), sp.issubset(sq))
self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))

def test_multiset_equal(self):
self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
self.assertFalse(Counter(a=3, b=2).isequal('babab'))

def test_multiset_subset(self):
self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
self.assertFalse(Counter(a=3, b=2).issubset('babab'))

def test_multiset_superset(self):
self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))

def test_multiset_disjoint(self):
self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))

def test_multiset_predicates_with_negative_counts(self):
# Multiset predicates run on the output of the elements() method,
# meaning that zero counts and negative counts are ignored.
# The tests below confirm that we get that same results as the
# tests above, even after a negative count has been included
# in either *self* or *other*.
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))


################################################################################
### Run tests
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add multiset comparison methods to collections.Counter(): isequal(),
issubset(), issuperset(), and isdisjoint().