Open
Description
import asyncio
import time
from pydantic_evals import Case, Dataset
# Create a dataset with multiple test cases
dataset = Dataset(
cases=[
Case(
name=f'case_{i}',
inputs=i,
expected_output=i * 2,
)
for i in range(5)
]
)
async def double_number(input_value: int) -> int:
"""Function that simulates work by sleeping for a second before returning double the input."""
await asyncio.sleep(0.1) # Simulate work
return input_value * 2
async def main():
# Run evaluation with unlimited concurrency
t0 = time.time()
report_default = await dataset.evaluate(double_number)
print(f'Evaluation took less than 0.2s: {time.time() - t0 < 0.2}')
#> Evaluation took less than 0.2s: True
report_default.print()
"""
Evaluation Summary:
double_number
┏━━━━━━━━━━┳━━━━━━━━━━┓
┃ Case ID ┃ Duration ┃
┡━━━━━━━━━━╇━━━━━━━━━━┩
│ case_0 │ 101.0ms │
├──────────┼──────────┤
│ case_1 │ 101.0ms │
├──────────┼──────────┤
│ case_2 │ 101.0ms │
├──────────┼──────────┤
│ case_3 │ 101.0ms │
├──────────┼──────────┤
│ case_4 │ 101.0ms │
├──────────┼──────────┤
│ Averages │ 101.0ms │
└──────────┴──────────┘
"""
# Run evaluation with limited concurrency
t0 = time.time()
report_limited = await dataset.evaluate(double_number, max_concurrency=1)
print(f'Evaluation took more than 0.5s: {time.time() - t0 > 0.5}')
#> Evaluation took more than 0.5s: True
report_limited.print()
"""
Evaluation Summary:
double_number
┏━━━━━━━━━━┳━━━━━━━━━━┓
┃ Case ID ┃ Duration ┃
┡━━━━━━━━━━╇━━━━━━━━━━┩
│ case_0 │ 101.0ms │
├──────────┼──────────┤
│ case_1 │ 101.0ms │
├──────────┼──────────┤
│ case_2 │ 101.0ms │
├──────────┼──────────┤
│ case_3 │ 101.0ms │
├──────────┼──────────┤
│ case_4 │ 101.0ms │
├──────────┼──────────┤
│ Averages │ 101.0ms │
└──────────┴──────────┘
"""
Metadata
Metadata
Assignees
Labels
No labels