feat: ADK cli allows developers to create an eval set and add an eval case

ankursharmas · copybara-github · commit ae139bb461c2 · 2025-10-09T20:31:01.000-07:00
Agent developers can now create an eval set and add eval cases through command line itself. Adding an eval case is limited only to specifying conversation scenarios.

Sample comamnds:
- Create an eval set:
adk eval_set create \
    contributing/samples/hello_world \
    set_01

- Add an eval case with scenario file
Content of scenarios.json file:
'{"scenarios": [{"starting_prompt": "hello", "conversation_plan": "world"}]}'

adk eval_set add_eval_case \
    contributing/samples/hello_world \
    set_01 \
    --scenarios scenarios.json

PiperOrigin-RevId: 817456117
diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py
@@ -47,6 +47,7 @@
 from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
 from ..evaluation.eval_metrics import JudgeModelOptions
 from ..evaluation.eval_result import EvalCaseResult
+from ..evaluation.eval_sets_manager import EvalSetsManager
 from ..evaluation.evaluator import EvalStatus
 from ..evaluation.evaluator import Evaluator
 from ..sessions.base_session_service import BaseSessionService
@@ -436,3 +437,22 @@ def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
     return FinalResponseMatchV2Evaluator(eval_metric)
 
   raise ValueError(f"Unsupported eval metric: {eval_metric}")
+
+
+def get_eval_sets_manager(
+    eval_storage_uri: Optional[str], agents_dir: str
+) -> EvalSetsManager:
+  """Returns an instance of EvalSetsManager."""
+  try:
+    from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
+    from .utils import evals
+  except ModuleNotFoundError as mnf:
+    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
+
+  if eval_storage_uri:
+    gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
+        eval_storage_uri
+    )
+    return gcs_eval_managers.eval_sets_manager
+  else:
+    return LocalEvalSetsManager(agents_dir=agents_dir)
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
@@ -18,6 +18,8 @@
 from contextlib import asynccontextmanager
 from datetime import datetime
 import functools
+import hashlib
+import json
 import logging
 import os
 from pathlib import Path
@@ -433,6 +435,28 @@ def cli_run(
   )
 
 
+def eval_options():
+  """Decorator to add common eval options to click commands."""
+
+  def decorator(func):
+    @click.option(
+        "--eval_storage_uri",
+        type=str,
+        help=(
+            "Optional. The evals storage URI to store agent evals,"
+            " supported URIs: gs://<bucket name>."
+        ),
+        default=None,
+    )
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+      return func(*args, **kwargs)
+
+    return wrapper
+
+  return decorator
+
+
 @main.command("eval", cls=HelpfulCommand)
 @click.argument(
     "agent_module_file_path",
@@ -449,15 +473,7 @@ def cli_run(
     default=False,
     help="Optional. Whether to print detailed results on console or not.",
 )
-@click.option(
-    "--eval_storage_uri",
-    type=str,
-    help=(
-        "Optional. The evals storage URI to store agent evals,"
-        " supported URIs: gs://<bucket name>."
-    ),
-    default=None,
-)
+@eval_options()
 def cli_eval(
     agent_module_file_path: str,
     eval_set_file_path_or_id: list[str],
@@ -675,6 +691,138 @@ def cli_eval(
       pretty_print_eval_result(eval_result)
 
 
+@main.group("eval_set")
+def eval_set():
+  """Manage Eval Sets."""
+  pass
+
+
+@eval_set.command("create", cls=HelpfulCommand)
+@click.argument(
+    "agent_module_file_path",
+    type=click.Path(
+        exists=True, dir_okay=True, file_okay=False, resolve_path=True
+    ),
+)
+@click.argument("eval_set_id", type=str, required=True)
+@eval_options()
+def cli_create_eval_set(
+    agent_module_file_path: str,
+    eval_set_id: str,
+    eval_storage_uri: Optional[str] = None,
+):
+  """Creates an empty EvalSet given the agent_module_file_path and eval_set_id."""
+  from .cli_eval import get_eval_sets_manager
+
+  app_name = os.path.basename(agent_module_file_path)
+  agents_dir = os.path.dirname(agent_module_file_path)
+  eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
+
+  try:
+    eval_sets_manager.create_eval_set(
+        app_name=app_name, eval_set_id=eval_set_id
+    )
+    click.echo(f"Eval set '{eval_set_id}' created for app '{app_name}'.")
+  except ValueError as e:
+    raise click.ClickException(str(e))
+
+
+@eval_set.command("add_eval_case", cls=HelpfulCommand)
+@click.argument(
+    "agent_module_file_path",
+    type=click.Path(
+        exists=True, dir_okay=True, file_okay=False, resolve_path=True
+    ),
+)
+@click.argument("eval_set_id", type=str, required=True)
+@click.option(
+    "--scenarios_file",
+    type=click.Path(
+        exists=True, dir_okay=False, file_okay=True, resolve_path=True
+    ),
+    help="A path to file containing JSON serialized ConversationScenarios.",
+    required=True,
+)
+@click.option(
+    "--session_input_file",
+    type=click.Path(
+        exists=True, dir_okay=False, file_okay=True, resolve_path=True
+    ),
+    help=(
+        "Optional. Path to session file containing SessionInput in JSON format."
+    ),
+    default=None,
+)
+@eval_options()
+def cli_add_eval_case(
+    agent_module_file_path: str,
+    eval_set_id: str,
+    scenarios_file: str,
+    eval_storage_uri: Optional[str] = None,
+    session_input_file: Optional[str] = None,
+):
+  """Adds eval cases to the given eval set.
+
+  There are several ways that an eval case can be created, for now this method
+  only supports adding one using a conversation scenarios file.
+
+  If an eval case for the generated id already exists, then we skip adding it.
+  """
+  try:
+    from ..evaluation.conversation_scenarios import ConversationScenarios
+    from ..evaluation.eval_case import EvalCase
+    from ..evaluation.eval_case import SessionInput
+    from .cli_eval import get_eval_sets_manager
+  except ModuleNotFoundError as mnf:
+    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
+
+  app_name = os.path.basename(agent_module_file_path)
+  agents_dir = os.path.dirname(agent_module_file_path)
+  eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
+
+  try:
+    session_input = None
+    if session_input_file:
+      with open(session_input_file, "r") as f:
+        session_input = SessionInput.model_validate_json(f.read())
+
+    with open(scenarios_file, "r") as f:
+      conversation_scenarios = ConversationScenarios.model_validate_json(
+          f.read()
+      )
+
+    for scenario in conversation_scenarios.scenarios:
+      scenario_str = json.dumps(scenario.model_dump(), sort_keys=True)
+      eval_id = hashlib.sha256(scenario_str.encode("utf-8")).hexdigest()[:8]
+      eval_case = EvalCase(
+          eval_id=eval_id,
+          conversation_scenario=scenario,
+          session_input=session_input,
+          creation_timestamp=datetime.now().timestamp(),
+      )
+
+      if (
+          eval_sets_manager.get_eval_case(
+              app_name=app_name, eval_set_id=eval_set_id, eval_case_id=eval_id
+          )
+          is None
+      ):
+        eval_sets_manager.add_eval_case(
+            app_name=app_name, eval_set_id=eval_set_id, eval_case=eval_case
+        )
+        click.echo(
+            f"Eval case '{eval_case.eval_id}' added to eval set"
+            f" '{eval_set_id}'."
+        )
+      else:
+        click.echo(
+            f"Eval case '{eval_case.eval_id}' already exists in eval set"
+            f" '{eval_set_id}', skipped adding."
+        )
+  except Exception as e:
+    raise click.ClickException(f"Failed to add eval case(s): {e}") from e
+
+
 def web_options():
   """Decorator to add web UI options to click commands."""
 
diff --git a/tests/unittests/cli/utils/test_cli_eval.py b/tests/unittests/cli/utils/test_cli_eval.py
@@ -0,0 +1,51 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for utilities in cli_eval."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest import mock
+
+
+def test_get_eval_sets_manager_local(monkeypatch):
+  mock_local_manager = mock.MagicMock()
+  monkeypatch.setattr(
+      "google.adk.evaluation.local_eval_sets_manager.LocalEvalSetsManager",
+      lambda *a, **k: mock_local_manager,
+  )
+  from google.adk.cli.cli_eval import get_eval_sets_manager
+
+  manager = get_eval_sets_manager(eval_storage_uri=None, agents_dir="some/dir")
+  assert manager == mock_local_manager
+
+
+def test_get_eval_sets_manager_gcs(monkeypatch):
+  mock_gcs_manager = mock.MagicMock()
+  mock_create_gcs = mock.MagicMock()
+  mock_create_gcs.return_value = SimpleNamespace(
+      eval_sets_manager=mock_gcs_manager
+  )
+  monkeypatch.setattr(
+      "google.adk.cli.utils.evals.create_gcs_eval_managers_from_uri",
+      mock_create_gcs,
+  )
+  from google.adk.cli.cli_eval import get_eval_sets_manager
+
+  manager = get_eval_sets_manager(
+      eval_storage_uri="gs://bucket", agents_dir="some/dir"
+  )
+  assert manager == mock_gcs_manager
+  mock_create_gcs.assert_called_once_with("gs://bucket")
diff --git a/tests/unittests/cli/utils/test_cli_tools_click.py b/tests/unittests/cli/utils/test_cli_tools_click.py