Skip to content

Commit

Permalink
[OPIK-979] [SDK] Allow usage of Python Opik SDK Evaluation Metrics wi…
Browse files Browse the repository at this point in the history
…thout Opik authentication settings (#1305)

* rename `check_for_misconfiguration` to `is_config_misconfigured` and add `show error message` arg

* do not ask for opik's credentials for evaluation metric

* fix unit evaluation tests

* add evaluation test with no opik configured

* rename methods

* change/rename imports
  • Loading branch information
japdubengsub authored Feb 18, 2025
1 parent 9600f66 commit a6398a4
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 35 deletions.
2 changes: 1 addition & 1 deletion sdks/python/src/opik/api_objects/opik_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
url_override=host,
api_key=api_key,
)
config.check_for_misconfiguration(config_)
config.is_misconfigured(config_, show_misconfiguration_message=True)
self._config = config_

self._workspace: str = config_.workspace
Expand Down
58 changes: 38 additions & 20 deletions sdks/python/src/opik/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,21 @@ def get_from_user_inputs(**user_inputs: Any) -> OpikConfig:
return OpikConfig(**cleaned_user_inputs)


def check_for_misconfiguration(config: OpikConfig) -> None:
def is_misconfigured(
config: OpikConfig,
show_misconfiguration_message: bool = False,
) -> bool:
"""
Determines if the provided Opik configuration is misconfigured and optionally displays
a corresponding error message.
Parameters:
config: The configuration object containing settings such as URL overrides, workspace, API key,
and tracking options to be validated for misconfiguration.
show_misconfiguration_message : A flag indicating whether to display detailed error messages if the configuration
is determined to be misconfigured. Defaults to False.
"""

cloud_installation = url_helpers.get_base_url(
config.url_override
) == url_helpers.get_base_url(OPIK_URL_CLOUD)
Expand All @@ -268,24 +282,28 @@ def check_for_misconfiguration(config: OpikConfig) -> None:
and (not api_key_configured or workspace_is_default)
and not tracking_disabled
):
print()
LOGGER.error(
"========================\n"
"The workspace and API key must be specified to log data to https://www.comet.com/opik.\n"
"You can use `opik configure` CLI command to configure your environment for logging.\n"
"See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration.\n"
"==============================\n"
)
return
if show_misconfiguration_message:
print()
LOGGER.error(
"========================\n"
"The workspace and API key must be specified to log data to https://www.comet.com/opik.\n"
"You can use `opik configure` CLI command to configure your environment for logging.\n"
"See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration.\n"
"==============================\n"
)
return True

if localhost_installation and not workspace_is_default and not tracking_disabled:
print()
LOGGER.error(
"========================\n"
"Open source installations do not support workspace specification. Only `default` is available.\n"
"See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration\n"
"If you need advanced workspace management - you may consider using our cloud offer (https://www.comet.com/site/pricing/)\n"
"or contact our team for purchasing and setting up a self-hosted installation.\n"
"==============================\n"
)
return
if show_misconfiguration_message:
print()
LOGGER.error(
"========================\n"
"Open source installations do not support workspace specification. Only `default` is available.\n"
"See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration\n"
"If you need advanced workspace management - you may consider using our cloud offer (https://www.comet.com/site/pricing/)\n"
"or contact our team for purchasing and setting up a self-hosted installation.\n"
"==============================\n"
)
return True

return False
13 changes: 8 additions & 5 deletions sdks/python/src/opik/evaluation/metrics/base_metric.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import abc
from typing import Any, Union, List
from typing import Any, List, Union

import opik
from opik import config as opik_config
from ..metrics import score_result
from opik import track as track_decorator


class BaseMetric(abc.ABC):
Expand Down Expand Up @@ -36,9 +37,11 @@ def __init__(self, name: str, track: bool = True) -> None:
self.name = name
self.track = track

if track:
self.score = track_decorator(name=self.name)(self.score) # type: ignore
self.ascore = track_decorator(name=self.name)(self.ascore) # type: ignore
config = opik_config.OpikConfig()

if track and opik_config.is_misconfigured(config) is False:
self.score = opik.track(name=self.name)(self.score) # type: ignore
self.ascore = opik.track(name=self.name)(self.ascore) # type: ignore

@abc.abstractmethod
def score(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,10 @@ def generate_provider_response(
valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
all_kwargs = {**self._completion_kwargs, **valid_litellm_params}

if opik_monitor.enabled_in_config():
if (
opik_monitor.enabled_in_config()
and not opik_monitor.opik_is_misconfigured()
):
all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)

response = self._engine.completion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def enabled_in_config() -> bool:
return config_.enable_litellm_models_monitoring


@functools.lru_cache
def opik_is_misconfigured() -> bool:
config_ = config.OpikConfig()
return config.is_misconfigured(config_)


def _add_span_metadata_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
current_span = opik_context.get_current_span_data()

Expand Down
24 changes: 24 additions & 0 deletions sdks/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from opik import context_storage
from opik.api_objects import opik_client
from opik.message_processing import streamer_constructors
from . import testlib
from .testlib import backend_emulator_message_processor


Expand Down Expand Up @@ -115,3 +116,26 @@ def fake_backend_without_batching(patch_streamer_without_batching):

def random_chars(n: int = 6) -> str:
return "".join(random.choice(string.ascii_letters) for _ in range(n))


@pytest.fixture()
def configure_opik_local_env_vars():
with testlib.patch_environ(
{
"OPIK_URL_OVERRIDE": "http://localhost:5173/api",
}
):
yield


@pytest.fixture()
def configure_opik_not_configured():
with testlib.patch_environ(
add_keys={},
remove_keys=[
"OPIK_URL_OVERRIDE",
"OPIK_API_KEY",
"OPIK_WORKSPACE",
],
):
yield
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,31 @@ def test__answer_relevance(context):
assert_score_result(result)


@pytest.mark.parametrize(
argnames="context",
argvalues=[
None,
["France is a country in Europe."],
],
)
def test__no_opik_configured__answer_relevance(
context,
configure_opik_not_configured,
):
import os

os.environ["OPIK_DISABLE_LITELLM_MODELS_MONITORING"] = "True"
answer_relevance_metric = metrics.AnswerRelevance()

result = answer_relevance_metric.score(
input="What's the capital of France?",
output="The capital of France is Paris.",
context=context,
)

assert_score_result(result)


@pytest.mark.parametrize(
argnames="context",
argvalues=[
Expand Down
19 changes: 15 additions & 4 deletions sdks/python/tests/testlib/patch_helpers.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
import os
import contextlib
import os
from typing import Any, Dict, List


@contextlib.contextmanager
def patch_environ(env):
def patch_environ(
add_keys: Dict[str, Any],
remove_keys: List[str] = None,
):
"""
Temporarily set environment variables inside the context manager and
fully restore previous environment afterwards
"""
original_env = {key: os.getenv(key) for key in env}
os.environ.update(env)
original_env = {key: os.getenv(key) for key in add_keys}

for key in remove_keys or []:
if key in os.environ:
original_env[key] = os.getenv(key)
del os.environ[key]

os.environ.update(add_keys)

try:
yield
finally:
Expand Down
20 changes: 16 additions & 4 deletions sdks/python/tests/unit/evaluation/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from ...testlib.models import FeedbackScoreModel, TraceModel


def test_evaluate_happyflow(fake_backend):
def test_evaluate__happyflow(
fake_backend,
configure_opik_local_env_vars,
):
mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
mock_dataset.name = "the-dataset-name"
mock_dataset.__internal_api__get_items_as_dataclasses__.return_value = [
Expand Down Expand Up @@ -218,7 +221,10 @@ def say_task(dataset_item: Dict[str, Any]):
assert_equal(expected_trace, actual_trace)


def test_evaluate_with_scoring_key_mapping(fake_backend):
def test_evaluate_with_scoring_key_mapping(
fake_backend,
configure_opik_local_env_vars,
):
mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
mock_dataset.name = "the-dataset-name"
mock_dataset.__internal_api__get_items_as_dataclasses__.return_value = [
Expand Down Expand Up @@ -439,7 +445,9 @@ def say_task(dataset_item: Dict[str, Any]):
assert_equal(expected_trace, actual_trace)


def test_evaluate___output_key_is_missing_in_task_output_dict__equals_metric_misses_output_argument__exception_raised():
def test_evaluate___output_key_is_missing_in_task_output_dict__equals_metric_misses_output_argument__exception_raised(
configure_opik_local_env_vars,
):
# Dataset is the only thing which is mocked for this test because
# evaluate should raise an exception right after the first attempt
# to compute Equals metric score.
Expand Down Expand Up @@ -487,6 +495,7 @@ def say_task(dataset_item: Dict[str, Any]):

def test_evaluate__exception_raised_from_the_task__error_info_added_to_the_trace(
fake_backend,
configure_opik_local_env_vars,
):
mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
mock_dataset.name = "the-dataset-name"
Expand Down Expand Up @@ -578,7 +587,10 @@ def say_task(dataset_item: Dict[str, Any]):
assert_equal(EXPECTED_TRACE_TREE, fake_backend.trace_trees[0])


def test_evaluate_prompt_happyflow(fake_backend):
def test_evaluate_prompt_happyflow(
fake_backend,
configure_opik_local_env_vars,
):
MODEL_NAME = "gpt-3.5-turbo"

mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
Expand Down

0 comments on commit a6398a4

Please sign in to comment.