[OPIK-979] [SDK] Allow usage of Python Opik SDK Evaluation Metrics wi…

…thout Opik authentication settings (#1305) * rename `check_for_misconfiguration` to `is_config_misconfigured` and add `show error message` arg * do not ask for opik's credentials for evaluation metric * fix unit evaluation tests * add evaluation test with no opik configured * rename methods * change/rename imports
comet-ml · Feb 18, 2025 · a6398a4 · a6398a4
1 parent 9600f66
commit a6398a4
Show file tree

Hide file tree

Showing 9 changed files with 137 additions and 35 deletions.
diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py
@@ -70,7 +70,7 @@ def __init__(
             url_override=host,
             api_key=api_key,
         )
-        config.check_for_misconfiguration(config_)
+        config.is_misconfigured(config_, show_misconfiguration_message=True)
         self._config = config_
 
         self._workspace: str = config_.workspace

diff --git a/sdks/python/src/opik/config.py b/sdks/python/src/opik/config.py
@@ -252,7 +252,21 @@ def get_from_user_inputs(**user_inputs: Any) -> OpikConfig:
     return OpikConfig(**cleaned_user_inputs)
 
 
-def check_for_misconfiguration(config: OpikConfig) -> None:
+def is_misconfigured(
+    config: OpikConfig,
+    show_misconfiguration_message: bool = False,
+) -> bool:
+    """
+    Determines if the provided Opik configuration is misconfigured and optionally displays
+    a corresponding error message.
+
+    Parameters:
+    config: The configuration object containing settings such as URL overrides, workspace, API key,
+        and tracking options to be validated for misconfiguration.
+    show_misconfiguration_message : A flag indicating whether to display detailed error messages if the configuration
+        is determined to be misconfigured. Defaults to False.
+    """
+
     cloud_installation = url_helpers.get_base_url(
         config.url_override
     ) == url_helpers.get_base_url(OPIK_URL_CLOUD)
@@ -268,24 +282,28 @@ def check_for_misconfiguration(config: OpikConfig) -> None:
         and (not api_key_configured or workspace_is_default)
         and not tracking_disabled
     ):
-        print()
-        LOGGER.error(
-            "========================\n"
-            "The workspace and API key must be specified to log data to https://www.comet.com/opik.\n"
-            "You can use `opik configure` CLI command to configure your environment for logging.\n"
-            "See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration.\n"
-            "==============================\n"
-        )
-        return
+        if show_misconfiguration_message:
+            print()
+            LOGGER.error(
+                "========================\n"
+                "The workspace and API key must be specified to log data to https://www.comet.com/opik.\n"
+                "You can use `opik configure` CLI command to configure your environment for logging.\n"
+                "See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration.\n"
+                "==============================\n"
+            )
+        return True
 
     if localhost_installation and not workspace_is_default and not tracking_disabled:
-        print()
-        LOGGER.error(
-            "========================\n"
-            "Open source installations do not support workspace specification. Only `default` is available.\n"
-            "See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration\n"
-            "If you need advanced workspace management - you may consider using our cloud offer (https://www.comet.com/site/pricing/)\n"
-            "or contact our team for purchasing and setting up a self-hosted installation.\n"
-            "==============================\n"
-        )
-        return
+        if show_misconfiguration_message:
+            print()
+            LOGGER.error(
+                "========================\n"
+                "Open source installations do not support workspace specification. Only `default` is available.\n"
+                "See the configuration details in the docs: https://www.comet.com/docs/opik/tracing/sdk_configuration\n"
+                "If you need advanced workspace management - you may consider using our cloud offer (https://www.comet.com/site/pricing/)\n"
+                "or contact our team for purchasing and setting up a self-hosted installation.\n"
+                "==============================\n"
+            )
+        return True
+
+    return False
diff --git a/sdks/python/src/opik/evaluation/metrics/base_metric.py b/sdks/python/src/opik/evaluation/metrics/base_metric.py
@@ -1,8 +1,9 @@
 import abc
-from typing import Any, Union, List
+from typing import Any, List, Union
 
+import opik
+from opik import config as opik_config
 from ..metrics import score_result
-from opik import track as track_decorator
 
 
 class BaseMetric(abc.ABC):
@@ -36,9 +37,11 @@ def __init__(self, name: str, track: bool = True) -> None:
         self.name = name
         self.track = track
 
-        if track:
-            self.score = track_decorator(name=self.name)(self.score)  # type: ignore
-            self.ascore = track_decorator(name=self.name)(self.ascore)  # type: ignore
+        config = opik_config.OpikConfig()
+
+        if track and opik_config.is_misconfigured(config) is False:
+            self.score = opik.track(name=self.name)(self.score)  # type: ignore
+            self.ascore = opik.track(name=self.name)(self.ascore)  # type: ignore
 
     @abc.abstractmethod
     def score(

diff --git a/sdks/python/src/opik/evaluation/models/litellm/litellm_chat_model.py b/sdks/python/src/opik/evaluation/models/litellm/litellm_chat_model.py
@@ -166,7 +166,10 @@ def generate_provider_response(
         valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
         all_kwargs = {**self._completion_kwargs, **valid_litellm_params}
 
-        if opik_monitor.enabled_in_config():
+        if (
+            opik_monitor.enabled_in_config()
+            and not opik_monitor.opik_is_misconfigured()
+        ):
             all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)
 
         response = self._engine.completion(

diff --git a/sdks/python/src/opik/evaluation/models/litellm/opik_monitor.py b/sdks/python/src/opik/evaluation/models/litellm/opik_monitor.py
@@ -32,6 +32,12 @@ def enabled_in_config() -> bool:
     return config_.enable_litellm_models_monitoring
 
 
+@functools.lru_cache
+def opik_is_misconfigured() -> bool:
+    config_ = config.OpikConfig()
+    return config.is_misconfigured(config_)
+
+
 def _add_span_metadata_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
     current_span = opik_context.get_current_span_data()
 

diff --git a/sdks/python/tests/conftest.py b/sdks/python/tests/conftest.py
@@ -8,6 +8,7 @@
 from opik import context_storage
 from opik.api_objects import opik_client
 from opik.message_processing import streamer_constructors
+from . import testlib
 from .testlib import backend_emulator_message_processor
 
 
@@ -115,3 +116,26 @@ def fake_backend_without_batching(patch_streamer_without_batching):
 
 def random_chars(n: int = 6) -> str:
     return "".join(random.choice(string.ascii_letters) for _ in range(n))
+
+
+@pytest.fixture()
+def configure_opik_local_env_vars():
+    with testlib.patch_environ(
+        {
+            "OPIK_URL_OVERRIDE": "http://localhost:5173/api",
+        }
+    ):
+        yield
+
+
+@pytest.fixture()
+def configure_opik_not_configured():
+    with testlib.patch_environ(
+        add_keys={},
+        remove_keys=[
+            "OPIK_URL_OVERRIDE",
+            "OPIK_API_KEY",
+            "OPIK_WORKSPACE",
+        ],
+    ):
+        yield
diff --git a/sdks/python/tests/library_integration/openai/test_evaluation_metrics.py b/sdks/python/tests/library_integration/openai/test_evaluation_metrics.py
@@ -35,6 +35,31 @@ def test__answer_relevance(context):
     assert_score_result(result)
 
 
+@pytest.mark.parametrize(
+    argnames="context",
+    argvalues=[
+        None,
+        ["France is a country in Europe."],
+    ],
+)
+def test__no_opik_configured__answer_relevance(
+    context,
+    configure_opik_not_configured,
+):
+    import os
+
+    os.environ["OPIK_DISABLE_LITELLM_MODELS_MONITORING"] = "True"
+    answer_relevance_metric = metrics.AnswerRelevance()
+
+    result = answer_relevance_metric.score(
+        input="What's the capital of France?",
+        output="The capital of France is Paris.",
+        context=context,
+    )
+
+    assert_score_result(result)
+
+
 @pytest.mark.parametrize(
     argnames="context",
     argvalues=[

diff --git a/sdks/python/tests/testlib/patch_helpers.py b/sdks/python/tests/testlib/patch_helpers.py
@@ -1,15 +1,26 @@
-import os
 import contextlib
+import os
+from typing import Any, Dict, List
 
 
 @contextlib.contextmanager
-def patch_environ(env):
+def patch_environ(
+    add_keys: Dict[str, Any],
+    remove_keys: List[str] = None,
+):
     """
     Temporarily set environment variables inside the context manager and
     fully restore previous environment afterwards
     """
-    original_env = {key: os.getenv(key) for key in env}
-    os.environ.update(env)
+    original_env = {key: os.getenv(key) for key in add_keys}
+
+    for key in remove_keys or []:
+        if key in os.environ:
+            original_env[key] = os.getenv(key)
+            del os.environ[key]
+
+    os.environ.update(add_keys)
+
     try:
         yield
     finally:

diff --git a/sdks/python/tests/unit/evaluation/test_evaluate.py b/sdks/python/tests/unit/evaluation/test_evaluate.py
@@ -13,7 +13,10 @@
 from ...testlib.models import FeedbackScoreModel, TraceModel
 
 
-def test_evaluate_happyflow(fake_backend):
+def test_evaluate__happyflow(
+    fake_backend,
+    configure_opik_local_env_vars,
+):
     mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
     mock_dataset.name = "the-dataset-name"
     mock_dataset.__internal_api__get_items_as_dataclasses__.return_value = [
@@ -218,7 +221,10 @@ def say_task(dataset_item: Dict[str, Any]):
         assert_equal(expected_trace, actual_trace)
 
 
-def test_evaluate_with_scoring_key_mapping(fake_backend):
+def test_evaluate_with_scoring_key_mapping(
+    fake_backend,
+    configure_opik_local_env_vars,
+):
     mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
     mock_dataset.name = "the-dataset-name"
     mock_dataset.__internal_api__get_items_as_dataclasses__.return_value = [
@@ -439,7 +445,9 @@ def say_task(dataset_item: Dict[str, Any]):
         assert_equal(expected_trace, actual_trace)
 
 
-def test_evaluate___output_key_is_missing_in_task_output_dict__equals_metric_misses_output_argument__exception_raised():
+def test_evaluate___output_key_is_missing_in_task_output_dict__equals_metric_misses_output_argument__exception_raised(
+    configure_opik_local_env_vars,
+):
     # Dataset is the only thing which is mocked for this test because
     # evaluate should raise an exception right after the first attempt
     # to compute Equals metric score.
@@ -487,6 +495,7 @@ def say_task(dataset_item: Dict[str, Any]):
 
 def test_evaluate__exception_raised_from_the_task__error_info_added_to_the_trace(
     fake_backend,
+    configure_opik_local_env_vars,
 ):
     mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])
     mock_dataset.name = "the-dataset-name"
@@ -578,7 +587,10 @@ def say_task(dataset_item: Dict[str, Any]):
     assert_equal(EXPECTED_TRACE_TREE, fake_backend.trace_trees[0])
 
 
-def test_evaluate_prompt_happyflow(fake_backend):
+def test_evaluate_prompt_happyflow(
+    fake_backend,
+    configure_opik_local_env_vars,
+):
     MODEL_NAME = "gpt-3.5-turbo"
 
     mock_dataset = mock.MagicMock(spec=["__internal_api__get_items_as_dataclasses__"])