|
18 | 18 |
|
19 | 19 | from pydantic_evals.evaluators import EvaluationReason, EvaluatorContext |
20 | 20 | from pydantic_evals.evaluators.common import ( |
21 | | - DEFAULT_EVALUATORS, |
22 | 21 | Contains, |
23 | 22 | Equals, |
24 | 23 | EqualsExpected, |
|
27 | 26 | LLMJudge, |
28 | 27 | MaxDuration, |
29 | 28 | OutputConfig, |
30 | | - Python, |
31 | 29 | ) |
32 | 30 | from pydantic_evals.otel._context_in_memory_span_exporter import context_subtree |
33 | 31 | from pydantic_evals.otel._errors import SpanTreeRecordingError |
@@ -395,68 +393,6 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture): |
395 | 393 | ) |
396 | 394 |
|
397 | 395 |
|
398 | | -async def test_python(): |
399 | | - """Test Python evaluator.""" |
400 | | - evaluator = Python(expression='ctx.output > 0') |
401 | | - |
402 | | - # Test with valid expression |
403 | | - assert evaluator.evaluate(MockContext(output=42)) is True |
404 | | - assert evaluator.evaluate(MockContext(output=-1)) is False |
405 | | - |
406 | | - # Test with invalid expression |
407 | | - evaluator_invalid = Python(expression='invalid syntax') |
408 | | - with pytest.raises(SyntaxError): |
409 | | - evaluator_invalid.evaluate(MockContext(output=42)) |
410 | | - |
411 | | - |
412 | | -async def test_python_evaluator(): |
413 | | - """Test Python evaluator.""" |
414 | | - ctx = EvaluatorContext( |
415 | | - name='test', |
416 | | - inputs={'x': 42}, |
417 | | - metadata=None, |
418 | | - expected_output=None, |
419 | | - output={'y': 84}, |
420 | | - duration=0.0, |
421 | | - _span_tree=SpanTreeRecordingError('did not record spans'), |
422 | | - attributes={}, |
423 | | - metrics={}, |
424 | | - ) |
425 | | - |
426 | | - # Test simple expression |
427 | | - evaluator = Python(expression='ctx.output["y"] == 84') |
428 | | - assert evaluator.evaluate(ctx) is True |
429 | | - |
430 | | - # Test accessing inputs |
431 | | - evaluator = Python(expression='ctx.inputs["x"] * 2 == ctx.output["y"]') |
432 | | - assert evaluator.evaluate(ctx) is True |
433 | | - |
434 | | - # Test complex expression |
435 | | - evaluator = Python(expression='all(k in ctx.output for k in ["y"])') |
436 | | - assert evaluator.evaluate(ctx) is True |
437 | | - |
438 | | - # Test invalid expression |
439 | | - evaluator = Python(expression='invalid syntax') |
440 | | - with pytest.raises(SyntaxError): |
441 | | - evaluator.evaluate(ctx) |
442 | | - |
443 | | - # Test expression with undefined variables |
444 | | - evaluator = Python(expression='undefined_var') |
445 | | - with pytest.raises(NameError): |
446 | | - evaluator.evaluate(ctx) |
447 | | - |
448 | | - # Test expression with type error |
449 | | - evaluator = Python(expression='ctx.output + 1') # Can't add dict and int |
450 | | - with pytest.raises(TypeError): |
451 | | - evaluator.evaluate(ctx) |
452 | | - |
453 | | - |
454 | | -def test_default_evaluators(): |
455 | | - """Test DEFAULT_EVALUATORS tuple.""" |
456 | | - # Verify that Python evaluator is not included for security reasons |
457 | | - assert Python not in DEFAULT_EVALUATORS |
458 | | - |
459 | | - |
460 | 396 | async def test_span_query_evaluator(capfire: CaptureLogfire): |
461 | 397 | """Test HasMatchingSpan evaluator.""" |
462 | 398 | # Create a span tree with a known structure |
|
0 commit comments