Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions dataframe_expectations/expectations/aggregation/unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,30 @@

class ExpectationUniqueRows(DataFrameAggregationExpectation):
"""
Expectation that checks if there are no duplicate rows for the given column names. If columns list is empty, checks for duplicates across all columns.
Expectation that checks if there are no duplicate rows for the given column names.

For example:
For column_names ["col1", "col2"]
If columns list is empty, checks for duplicates across all columns.

Given the following DataFrame:
Example::

| col1 | col2 | col3 |
|------|------|------|
| 1 | 10 | 100 |
| 2 | 20 | 100 |
| 3 | 30 | 100 |
| 1 | 20 | 100 |
For column_names ["col1", "col2"], given a DataFrame with columns col1, col2, col3:

All rows are unique for columns ["col1", "col2"] and there will be no violations.
- If all rows have unique combinations of col1 and col2 values, there are no violations
- If any rows have identical col1 AND col2 values, those are violations

For the same columns_names and the following DataFrame:
Example passing case (all unique)::

| col1 | col2 | col3 |
|------|------|------|
| 1 | 10 | 100 |
| 2 | 20 | 100 |
| 3 | 30 | 100 |
| 1 | 10 | 100 |
col1=1, col2=10
col1=2, col2=20
col1=3, col2=30
col1=1, col2=20 # Different col2, so unique

There will be 1 violation because the first and last rows are duplicates for columns ["col1", "col2"].
Example failing case (duplicate found)::

col1=1, col2=10
col1=2, col2=20
col1=3, col2=30
col1=1, col2=10 # Duplicate of first row
"""

def __init__(self, column_names: List[str], tags: Optional[List[str]] = None):
Expand Down
6 changes: 4 additions & 2 deletions dataframe_expectations/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,8 @@ def validate(self, func: Optional[Callable] = None, *, allow_none: bool = False)
by the decorated function. If validation fails, it raises
DataFrameExpectationsSuiteFailure.

Example:
Example::

runner = suite.build()

@runner.validate
Expand Down Expand Up @@ -439,7 +440,8 @@ class DataFrameExpectationsSuite:
Use this class to add expectations, then call build() to create an
immutable runner that can execute the expectations on DataFrames.

Example:
Example::

suite = DataFrameExpectationsSuite(suite_name="user_validation")
suite.expect_value_greater_than(
column_name="age",
Expand Down
6 changes: 4 additions & 2 deletions dataframe_expectations/suite.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ class DataFrameExpectationsSuiteRunner:
by the decorated function. If validation fails, it raises
DataFrameExpectationsSuiteFailure.

Example:
Example::

runner = suite.build()

@runner.validate
Expand Down Expand Up @@ -149,7 +150,8 @@ class DataFrameExpectationsSuite:
Use this class to add expectations, then call build() to create an
immutable runner that can execute the expectations on DataFrames.

Example:
Example::

suite = DataFrameExpectationsSuite(suite_name="user_validation")
suite.expect_value_greater_than(
column_name="age",
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sphinx>=4.0.0
sphinx-rtd-theme>=1.0.0
sphinx-book-theme>=1.0.0
sphinx-autobuild>=2021.3.14
69 changes: 46 additions & 23 deletions docs/source/_ext/expectations_autodoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def run(self) -> List[Node]:
return nodes_list

def _generate_summary_table(self, expectations_by_category, method_details) -> List[Node]:
"""Generate summary table nodes."""
"""Generate summary as a two-level table"""
nodes_list = []

# Add section with title and proper ID
Expand All @@ -180,13 +180,17 @@ def _generate_summary_table(self, expectations_by_category, method_details) -> L

# Create table
table = nodes.table()
table['classes'] = ['summary-table']
tgroup = nodes.tgroup(cols=3)
table += tgroup

# Add column specifications
for width in [30, 25, 45]:
colspec = nodes.colspec(colwidth=width)
tgroup += colspec
colspec1 = nodes.colspec(colwidth=25)
colspec2 = nodes.colspec(colwidth=20)
colspec3 = nodes.colspec(colwidth=55)
tgroup += colspec1
tgroup += colspec2
tgroup += colspec3

# Add table head
thead = nodes.thead()
Expand All @@ -197,48 +201,67 @@ def _generate_summary_table(self, expectations_by_category, method_details) -> L

for header in ["Category", "Subcategory", "Expectations"]:
entry = nodes.entry()
entry['classes'] = ['summary-table-header']
row += entry
entry += nodes.paragraph("", header)
para = nodes.paragraph()
para += nodes.Text(header)
entry += para

# Add table body
tbody = nodes.tbody()
tgroup += tbody

for category in sorted(expectations_by_category.keys()):
for subcategory in sorted(expectations_by_category[category].keys()):
expectations = expectations_by_category[category][subcategory]
subcategories = expectations_by_category[category]

for idx, subcategory in enumerate(sorted(subcategories.keys())):
expectations = subcategories[subcategory]

row = nodes.row()
row['classes'] = ['summary-table-row']
tbody += row

# Category cell
# Category cell (only show on first subcategory)
entry = nodes.entry()
row += entry
entry += nodes.paragraph("", category)
if idx == 0:
entry['morerows'] = len(subcategories) - 1 # Span multiple rows
entry['classes'] = ['summary-category-cell']
para = nodes.paragraph()
para += nodes.Text(f"{category} ({sum(len(subcategories[s]) for s in subcategories)})")
entry += para
row += entry

# Subcategory cell
entry = nodes.entry()
entry['classes'] = ['summary-subcategory-cell']
para = nodes.paragraph()
para += nodes.Text(f"{subcategory} ({len(expectations)})")
entry += para
row += entry
entry += nodes.paragraph("", subcategory)

# Expectations cell
# Expectations cell with badges
entry = nodes.entry()
row += entry
entry['classes'] = ['summary-expectations-cell']

badges_container = nodes.container()
badges_container['classes'] = ['expectation-badges']

exp_para = nodes.paragraph()
for i, exp in enumerate(sorted(expectations)):
if i > 0:
exp_para += nodes.Text(", ")
for exp in sorted(expectations):
# Get description for tooltip
details = method_details[exp]
clean_docstring = clean_docstring_from_metadata(details["docstring"])
description = clean_docstring.split('\n')[0] if clean_docstring else ""

# Create clickable link to the card using raw HTML
raw_link = nodes.raw(
f'<a href="#card-{exp}" class="expectation-link">{exp}</a>',
f'<a href="#card-{exp}" class="expectation-link">{exp}</a>',
# Create badge with link
badge = nodes.raw(
f'<a href="#card-{exp}" class="expectation-badge" title="{description}">{exp}</a>',
f'<a href="#card-{exp}" class="expectation-badge" title="{description}">{exp}</a>',
format='html'
)
exp_para += raw_link
badges_container += badge

entry += exp_para
entry += badges_container
row += entry

summary_section += table
nodes_list.append(summary_section)
Expand Down
Loading