posit-dev · rich-iannone · Mar 3, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 2, 2025
diff --git a/Makefile b/Makefile
@@ -7,7 +7,7 @@ test-update:
 	pytest --snapshot-update
 
 test-coverage:
-	pytest --cov=pointblank --cov-report=term
+	pytest --cov=pointblank --cov-report=term-missing
 
 check:
 	pyright --pythonversion 3.8 pointblank

diff --git a/pointblank/_interrogation.py b/pointblank/_interrogation.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from dataclasses import dataclass
 
-from typing import Any
+from typing import Any, TYPE_CHECKING
 
 import narwhals as nw
 from narwhals.typing import FrameT
@@ -18,6 +18,8 @@
 from pointblank.column import Column, ColumnLiteral
 from pointblank.schema import Schema
 
+if TYPE_CHECKING:
+    from pointblank._typing import AbsoluteTolBounds
 
 @dataclass
 class Interrogator:
@@ -1893,16 +1895,23 @@ class RowCountMatch:
     count: int
     inverse: bool
     threshold: int
+    abs_tol_bounds : AbsoluteTolBounds
     tbl_type: str = "local"
 
     def __post_init__(self):
 
         from pointblank.validate import get_row_count
 
-        if not self.inverse:
-            res = get_row_count(data=self.data_tbl) == self.count
+        row_count :int = get_row_count(data=self.data_tbl)
+
+        lower_abs_limit, upper_abs_limit = self.abs_tol_bounds
+        min_val: int = self.count - lower_abs_limit
+        max_val: int = self.count + upper_abs_limit
+
+        if self.inverse:
+            res : bool = not (row_count >= min_val and row_count <= max_val)
         else:
-            res = get_row_count(data=self.data_tbl) != self.count
+            res : bool = row_count >= min_val and row_count <= max_val
 
         self.test_unit_res = res
 

diff --git a/pointblank/_typing.py b/pointblank/_typing.py
@@ -0,0 +1,11 @@
+
+from __future__ import annotations
+
+## Absolute bounds, ie. plus or minus
+type AbsoluteBounds = tuple[int, int]
+
+## Relative bounds, ie. plus or minus some percent
+type RelativeBounds = tuple[float, float]
+
+## Tolerance afforded to some check
+type Tolerance = int | float | AbsoluteBounds | RelativeBounds
diff --git a/pointblank/_utils.py b/pointblank/_utils.py
@@ -3,7 +3,7 @@
 import re
 import inspect
 
-from typing import Any
+from typing import Any, TYPE_CHECKING
 
 import narwhals as nw
 from narwhals.typing import FrameT
@@ -12,6 +12,25 @@
 
 from pointblank._constants import ASSERTION_TYPE_METHOD_MAP, GENERAL_COLUMN_TYPES
 
+if TYPE_CHECKING:
+    from pointblank._typing import AbsoluteBounds, Tolerance
+
+def _derive_single_bound(ref: int, tol : int | float) -> int:
+    """Derive a single bound using the reference."""
+    if not isinstance(tol, float | int):
+        raise TypeError("Tolerance must be a number or a tuple of numbers.")
+    if tol < 0:
+        raise ValueError("Tolerance must be non-negative.")
+    return int(tol * ref) if tol < 1 else int(tol)
+
+def _derive_bounds(ref: int, tol: Tolerance) -> AbsoluteBounds:
+    """Validate and extract the absolute bounds of the tolerance."""
+    if isinstance(tol, tuple):
+        return tuple(_derive_single_bound(ref, t) for t in tol)
+
+    bound = _derive_single_bound(ref, tol)
+    return bound, bound
+
 
 def _get_tbl_type(data: FrameT | Any) -> str:
 

diff --git a/pointblank/validate.py b/pointblank/validate.py
@@ -64,6 +64,7 @@
     _is_lib_present,
     _is_value_a_df,
     _select_df_lib,
+    _derive_bounds
 )
 from pointblank._utils_check_args import (
     _check_column,
@@ -75,6 +76,11 @@
 )
 from pointblank._utils_html import _create_table_type_html, _create_table_dims_html
 
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pointblank._typing import Tolerance, AbsoluteBounds
+
 __all__ = [
     "Validate",
     "load_dataset",
@@ -4551,6 +4557,7 @@ def col_schema_match(
     def row_count_match(
         self,
         count: int | FrameT | Any,
+        tol: Tolerance = 0,
         inverse: bool = False,
         pre: Callable | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -4575,6 +4582,14 @@ def row_count_match(
             The expected row count of the table. This can be an integer value, a Polars or Pandas
             DataFrame object, or an Ibis backend table. If a DataFrame/table is provided, the row
             count of that object will be used as the expected count.
+        tol
+            The tolerance allowable for the row count match. This can be specified as a single
+            numeric value (integer or float) or as a tuple of two integers representing the lower
+            and upper bounds of the tolerance range. If a single integer value (greater than 1) is
+            provided, it represents the absolute bounds of the tolerance, ie. plus or minus the value.
+            If a float value (between 0-1) is provided, it represents the relative tolerance, ie.
+            plus or minus the relative percentage of the target. If a tuple is provided, it represents
+            the lower and upper absolute bounds of the tolerance range. See the examples for more.
         inverse
             Should the validation step be inverted? If `True`, then the expectation is that the row
             count of the target table should not match the specified `count=` value.
@@ -4640,6 +4655,37 @@ def row_count_match(
 
         The validation table shows that the expectation value of `13` matches the actual count of
         rows in the target table. So, the single test unit passed.
+
+
+        Let's modify our example to show the different ways we can allow some tolerance to our validation
+        by using the `tol` argument.
+
+        ```{python}
+        smaller_small_table = small_table.sample(n = 12) # within the lower bound
+        validation = (
+            pb.Validate(data=smaller_small_table)
+            .row_count_match(count=13,tol=(2, 0)) # minus 2 but plus 0, ie. 11-13
+            .interrogate()
+        )
+
+        validation
+
+        validation = (
+            pb.Validate(data=smaller_small_table)
+            .row_count_match(count=13,tol=.05) # .05% tolerance of 13
+            .interrogate()
+        )
+
+        even_smaller_table = small_table.sample(n = 2)
+        validation = (
+            pb.Validate(data=even_smaller_table)
+            .row_count_match(count=13,tol=5) # plus or minus 5; this test will fail
+            .interrogate()
+        )
+
+        validation
+        ```
+
         """
 
         assertion_type = _get_fn_name()
@@ -4659,8 +4705,11 @@ def row_count_match(
         if _is_value_a_df(count) or "ibis.expr.types.relations.Table" in str(type(count)):
             count = get_row_count(count)
 
+        # Check the integrity of tolerance
+        bounds: AbsoluteBounds = _derive_bounds(ref = int(count), tol = tol)
+
         # Package up the `count=` and boolean params into a dictionary for later interrogation
-        values = {"count": count, "inverse": inverse}
+        values = {"count": count, "inverse": inverse, "abs_tol_bounds": bounds}
 
         val_info = _ValidationInfo(
             assertion_type=assertion_type,
@@ -5158,6 +5207,7 @@ def interrogate(
                     count=value["count"],
                     inverse=value["inverse"],
                     threshold=threshold,
+                    abs_tol_bounds = value["abs_tol_bounds"],
                     tbl_type=tbl_type,
                 ).get_test_results()
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -122,3 +122,8 @@ testpaths = [
 
 [tool.black]
 line-length = 100
+
+[tool.coverage.report]
+exclude_also = [
+    "if TYPE_CHECKING:"
+]
diff --git a/tests/test_validate.py b/tests/test_validate.py
@@ -1,10 +1,14 @@
+
+from __future__ import annotations
+
 import pathlib
 
 import pprint
 import sys
 import re
 from unittest.mock import patch
 import pytest
+import random
 import itertools
 from functools import partial
 import contextlib
@@ -47,6 +51,11 @@
     last_n,
 )
 
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any
+
 
 TBL_LIST = [
     "tbl_pd",
@@ -2660,6 +2669,70 @@ def test_row_count_match(request, tbl_fixture):
 
     assert Validate(tbl).row_count_match(count=tbl).interrogate().n_passed(i=1, scalar=True) == 1
 
+@pytest.mark.parametrize(('val','e', 'exc'),
+                         [
+                             ((-1, 5), ValueError, 'Tolerance must be non-negative'),
+                             ([100, 5], TypeError, 'Tolerance must be a number or a tuple of numbers'),
+                             ((5, -1), ValueError, 'Tolerance must be non-negative'),
+                             ((None, .05), TypeError, 'Tolerance must be a number or a tuple of numbers'),
+                             (('fooval', 100), TypeError, 'Tolerance must be a number or a tuple of numbers'),
+                             (-1, ValueError, 'Tolerance must be non-negative'),
+                         ])
+def test_invalid_row_count_tol(val: Any, e: Exception, exc :str) -> None:
+    data = pl.DataFrame({"foocol": [1, 2, 3]})
+
+    with pytest.raises(expected_exception=e, match = exc):
+        Validate(data=data).row_count_match(count = 3, tol=val)
+
+
+def test_row_count_example_tol() -> None:
+    small_table = load_dataset("small_table")
+    smaller_small_table = small_table.sample(n = 12) # within the lower bound
+    (
+        Validate(data=smaller_small_table)
+        .row_count_match(count=13,tol=(2, 0)) # minus 2 but plus 0, ie. 11-13
+        .interrogate()
+        .assert_passing()
+    )
+
+    (
+        Validate(data=smaller_small_table)
+        .row_count_match(count=13,tol=.5) # .50% tolerance of 13
+        .interrogate()
+        .assert_passing()
+        )
+
+    even_smaller_table = small_table.sample(n = 2)
+    with pytest.raises(AssertionError):
+        (
+            Validate(data=even_smaller_table)
+            .row_count_match(count=13,tol=5) # plus or minus 5; this test will fail
+            .interrogate()
+            .assert_passing()
+        )
+
+test_row_count_example_tol()
+
+@pytest.mark.parametrize(('nrows','target_count','tol','should_pass'),
+                         [
+                            (98, 100, .05, True),
+                            (98, 100, 5, True),
+                            (104, 100, (5, 5), True),
+                            (0, 100, .05, False),
+                            (0, 100, 5, False),
+                            (0, 100, (5, 5), False),
+                            (98, 100, .95, True),
+                         ])
+def test_row_count_tol(nrows : int, target_count: int, tol : float | tuple[int, int], should_pass : bool) -> None:
+    data = pl.DataFrame({"foocol": [random.random()] * nrows})
+
+    catcher = contextlib.nullcontext if should_pass else partial(pytest.raises, AssertionError, match = "All tests did not pass")
+
+    with catcher():
+        Validate(data = data).row_count_match(
+            count = target_count,
+            tol = tol
+        ).interrogate().assert_passing()
 
 @pytest.mark.parametrize("tbl_fixture", TBL_LIST)
 def test_col_count_match(request, tbl_fixture):