Skip to content

Commit

Permalink
[OPIK-41] Show the link to UI in pytest integration (#165)
Browse files Browse the repository at this point in the history
* Add summary printing to pytest integration

* Implement summary printing for pytest integration, implement additional protection to make sure opik failures won't break pytest execution

* Justify right column to left side

* Add unit tests for convert_exception_to_log_message

* Fix lint errors

* Fix the way UI url is built
  • Loading branch information
alexkuzmik authored Sep 3, 2024
1 parent 8c0b1a8 commit 68f096c
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 13 deletions.
23 changes: 23 additions & 0 deletions sdks/python/src/opik/_logging.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Callable, Any
import functools
import logging

import sys
Expand Down Expand Up @@ -32,3 +34,24 @@ def setup() -> None:
root_level = min(root_level, file_handler.level)

opik_root_logger.setLevel(level=root_level)


def convert_exception_to_log_message(
message: str,
logger: logging.Logger,
return_on_exception: Any = None,
logging_level: int = logging.ERROR,
**log_kwargs: Any,
) -> Callable:
def decorator(function: Callable) -> Any:
@functools.wraps(function)
def wrapper(*args: Any, **kwargs: Any) -> Any:
try:
return function(*args, **kwargs)
except Exception:
logger.log(logging_level, message, **log_kwargs)
return return_on_exception

return wrapper

return decorator
2 changes: 1 addition & 1 deletion sdks/python/src/opik/decorator/base_track_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def wrapper(*args, **kwargs) -> Any: # type: ignore
try:
result = func(*args, **kwargs)
except Exception as exception:
LOGGER.error(
LOGGER.debug(
logging_messages.EXCEPTION_RAISED_FROM_TRACKED_FUNCTION,
func.__name__,
(args, kwargs),
Expand Down
1 change: 0 additions & 1 deletion sdks/python/src/opik/plugins/pytest/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:

node_id: str = _get_test_nodeid()
test_runs_storage.LLM_UNIT_TEST_RUNS.add(node_id)
print(test_runs_storage.LLM_UNIT_TEST_RUNS)

test_run_content_ = _get_test_run_content(
func=func,
Expand Down
61 changes: 50 additions & 11 deletions sdks/python/src/opik/plugins/pytest/hooks.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,45 @@
import logging
from typing import List, Any, Generator
from opik import _logging
from typing import List, Any, Generator, TYPE_CHECKING
from opik.types import FeedbackScoreDict

from opik.api_objects import opik_client
from . import test_runs_storage, experiment_runner
from . import test_runs_storage, experiment_runner, summary

import pytest
from pytest import Session, TestReport, Item

if TYPE_CHECKING:
import _pytest.terminal

LOGGER = logging.getLogger(__name__)


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item: "Item") -> Generator:
def pytest_runtest_makereport(item: "pytest.Item") -> Generator:
"""
Write the results of each test in the session
"""
outcome = yield

report = outcome.get_result()
if report.when == "call":
item.report = report
try:
report = outcome.get_result()
if report.when == "call":
item.report = report
except Exception:
LOGGER.debug(
"Unexpected failure during opik pytest_runtest_makereport hook",
exc_info=True,
)


def pytest_sessionfinish(session: "Session", exitstatus: Any) -> None:
llm_test_items: List["Item"] = [
@_logging.convert_exception_to_log_message(
"Unexpected failure during opik pytest_sessionfinish hook",
logger=LOGGER,
exc_info=True,
logging_level=logging.ERROR,
)
def pytest_sessionfinish(session: "pytest.Session", exitstatus: Any) -> None:
llm_test_items: List["pytest.Item"] = [
test_item
for test_item in session.items
if test_item.nodeid in test_runs_storage.LLM_UNIT_TEST_RUNS
Expand All @@ -36,7 +51,7 @@ def pytest_sessionfinish(session: "Session", exitstatus: Any) -> None:
traces_feedback_scores: List[FeedbackScoreDict] = []

for item in llm_test_items:
report: "TestReport" = item.report
report: "pytest.TestReport" = item.report
trace = test_runs_storage.TEST_RUNS_TRACES[item.nodeid]
traces_feedback_scores.append(
{"id": trace.id, "name": "Passed", "value": report.passed}
Expand All @@ -46,10 +61,34 @@ def pytest_sessionfinish(session: "Session", exitstatus: Any) -> None:
client.log_traces_feedback_scores(traces_feedback_scores)

experiment_runner.run(client=client, test_items=llm_test_items)

client.flush()
except Exception:
LOGGER.error(
"Unexpected exception occured while trying to log LLM unit tests experiment results",
exc_info=True,
)


@_logging.convert_exception_to_log_message(
"Unexpected exception occured while trying to print LLM unit tests summary",
logger=LOGGER,
exc_info=True,
logging_level=logging.DEBUG,
)
def pytest_terminal_summary(
terminalreporter: "_pytest.terminal.TerminalReporter",
) -> None:
reports: List[pytest.TestReport] = terminalreporter.stats.get(
"passed", []
) + terminalreporter.stats.get("failed", [])

llm_reports = [
report
for report in reports
if report.nodeid in test_runs_storage.LLM_UNIT_TEST_RUNS
]

if len(llm_reports) == 0:
return

summary.print(llm_reports)
41 changes: 41 additions & 0 deletions sdks/python/src/opik/plugins/pytest/summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import List

import opik
import pytest

import rich.console
import rich.panel
import rich.table
import rich.text

import opik.url_helpers


def print(reports: List[pytest.TestReport]) -> None:
opik_ui_url = opik.url_helpers.get_ui_url()

console = rich.console.Console()

total_tests = len(reports)
passed_tests = len([report for report in reports if report.passed])
failed_tests = len([report for report in reports if report.failed])

table = rich.table.Table(show_header=False, show_lines=False, padding=(0, 1))
table.add_column(justify="left")
table.add_column(justify="left")

table.add_row("Passed:", f"{passed_tests}", style="green")
table.add_row("Failed:", f"{failed_tests}", style="red")
table.add_row("Total:", f"{total_tests}")
table.add_row()
table.add_row("See the results:", f"[underline blue]{opik_ui_url}[/]")

panel = rich.panel.Panel(
table,
title="[bold]Opik: LLM Test Results[/bold]",
title_align="left",
expand=False,
)

console.print("\n")
console.print(panel)
8 changes: 8 additions & 0 deletions sdks/python/src/opik/url_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import opik.config


def get_ui_url() -> str:
config = opik.config.OpikConfig()
opik_url_override = config.url_override

return opik_url_override.rstrip("/api")
70 changes: 70 additions & 0 deletions sdks/python/tests/unit/test_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import mock
import logging
from opik import _logging


def test_convert_exception_to_log_message__happy_scenario_with_original_returned_value():
function = mock.Mock(return_value="return value")
logger = mock.Mock(spec=["log"])

convert_exception_to_log_message_decorator = (
_logging.convert_exception_to_log_message(
"Error message",
logger=logger,
exception_info=True,
logging_level="some-level",
)
)
decorated_function = convert_exception_to_log_message_decorator(function)

assert decorated_function() == "return value"

function.assert_called_once()
logger.log.assert_not_called()


def test_convert_exception_to_log_message__exception_raised__exception_converted_into_log_message__another_value_returned__log_is_called_with_passed_kwarg():
function = mock.Mock(side_effect=Exception())
logger = mock.Mock(spec=["log"])

convert_exception_to_log_message_decorator = (
_logging.convert_exception_to_log_message(
"Error message",
logger=logger,
some_log_kwarg="some-log-kwarg",
return_on_exception="return_value",
logging_level="some-level",
)
)
decorated_function = convert_exception_to_log_message_decorator(function)

assert decorated_function() == "return_value"

function.assert_called_once()
logger.log.assert_called_once_with(
"some-level", "Error message", some_log_kwarg="some-log-kwarg"
)


def test_convert_exception_to_log_message__logging_level_not_set__error_level_used():
function = mock.Mock(side_effect=Exception())
logger = mock.Mock(spec=["log"])

convert_exception_to_log_message_decorator = (
_logging.convert_exception_to_log_message(
"Error message",
logger=logger,
some_log_kwarg="some-log-kwarg",
return_on_exception="return_value",
)
)
decorated_function = convert_exception_to_log_message_decorator(function)

assert decorated_function() == "return_value"

function.assert_called_once()
logger.log.assert_called_once_with(
logging.ERROR,
"Error message",
some_log_kwarg="some-log-kwarg",
)

0 comments on commit 68f096c

Please sign in to comment.