Skip to content

Commit ae139bb

Browse files
ankursharmascopybara-github
authored andcommitted
feat: ADK cli allows developers to create an eval set and add an eval case
Agent developers can now create an eval set and add eval cases through command line itself. Adding an eval case is limited only to specifying conversation scenarios. Sample comamnds: - Create an eval set: adk eval_set create \ contributing/samples/hello_world \ set_01 - Add an eval case with scenario file Content of scenarios.json file: '{"scenarios": [{"starting_prompt": "hello", "conversation_plan": "world"}]}' adk eval_set add_eval_case \ contributing/samples/hello_world \ set_01 \ --scenarios scenarios.json PiperOrigin-RevId: 817456117
1 parent 9939e0b commit ae139bb

File tree

4 files changed

+400
-9
lines changed

4 files changed

+400
-9
lines changed

src/google/adk/cli/cli_eval.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
4848
from ..evaluation.eval_metrics import JudgeModelOptions
4949
from ..evaluation.eval_result import EvalCaseResult
50+
from ..evaluation.eval_sets_manager import EvalSetsManager
5051
from ..evaluation.evaluator import EvalStatus
5152
from ..evaluation.evaluator import Evaluator
5253
from ..sessions.base_session_service import BaseSessionService
@@ -436,3 +437,22 @@ def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
436437
return FinalResponseMatchV2Evaluator(eval_metric)
437438

438439
raise ValueError(f"Unsupported eval metric: {eval_metric}")
440+
441+
442+
def get_eval_sets_manager(
443+
eval_storage_uri: Optional[str], agents_dir: str
444+
) -> EvalSetsManager:
445+
"""Returns an instance of EvalSetsManager."""
446+
try:
447+
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
448+
from .utils import evals
449+
except ModuleNotFoundError as mnf:
450+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
451+
452+
if eval_storage_uri:
453+
gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
454+
eval_storage_uri
455+
)
456+
return gcs_eval_managers.eval_sets_manager
457+
else:
458+
return LocalEvalSetsManager(agents_dir=agents_dir)

src/google/adk/cli/cli_tools_click.py

Lines changed: 157 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from contextlib import asynccontextmanager
1919
from datetime import datetime
2020
import functools
21+
import hashlib
22+
import json
2123
import logging
2224
import os
2325
from pathlib import Path
@@ -433,6 +435,28 @@ def cli_run(
433435
)
434436

435437

438+
def eval_options():
439+
"""Decorator to add common eval options to click commands."""
440+
441+
def decorator(func):
442+
@click.option(
443+
"--eval_storage_uri",
444+
type=str,
445+
help=(
446+
"Optional. The evals storage URI to store agent evals,"
447+
" supported URIs: gs://<bucket name>."
448+
),
449+
default=None,
450+
)
451+
@functools.wraps(func)
452+
def wrapper(*args, **kwargs):
453+
return func(*args, **kwargs)
454+
455+
return wrapper
456+
457+
return decorator
458+
459+
436460
@main.command("eval", cls=HelpfulCommand)
437461
@click.argument(
438462
"agent_module_file_path",
@@ -449,15 +473,7 @@ def cli_run(
449473
default=False,
450474
help="Optional. Whether to print detailed results on console or not.",
451475
)
452-
@click.option(
453-
"--eval_storage_uri",
454-
type=str,
455-
help=(
456-
"Optional. The evals storage URI to store agent evals,"
457-
" supported URIs: gs://<bucket name>."
458-
),
459-
default=None,
460-
)
476+
@eval_options()
461477
def cli_eval(
462478
agent_module_file_path: str,
463479
eval_set_file_path_or_id: list[str],
@@ -675,6 +691,138 @@ def cli_eval(
675691
pretty_print_eval_result(eval_result)
676692

677693

694+
@main.group("eval_set")
695+
def eval_set():
696+
"""Manage Eval Sets."""
697+
pass
698+
699+
700+
@eval_set.command("create", cls=HelpfulCommand)
701+
@click.argument(
702+
"agent_module_file_path",
703+
type=click.Path(
704+
exists=True, dir_okay=True, file_okay=False, resolve_path=True
705+
),
706+
)
707+
@click.argument("eval_set_id", type=str, required=True)
708+
@eval_options()
709+
def cli_create_eval_set(
710+
agent_module_file_path: str,
711+
eval_set_id: str,
712+
eval_storage_uri: Optional[str] = None,
713+
):
714+
"""Creates an empty EvalSet given the agent_module_file_path and eval_set_id."""
715+
from .cli_eval import get_eval_sets_manager
716+
717+
app_name = os.path.basename(agent_module_file_path)
718+
agents_dir = os.path.dirname(agent_module_file_path)
719+
eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
720+
721+
try:
722+
eval_sets_manager.create_eval_set(
723+
app_name=app_name, eval_set_id=eval_set_id
724+
)
725+
click.echo(f"Eval set '{eval_set_id}' created for app '{app_name}'.")
726+
except ValueError as e:
727+
raise click.ClickException(str(e))
728+
729+
730+
@eval_set.command("add_eval_case", cls=HelpfulCommand)
731+
@click.argument(
732+
"agent_module_file_path",
733+
type=click.Path(
734+
exists=True, dir_okay=True, file_okay=False, resolve_path=True
735+
),
736+
)
737+
@click.argument("eval_set_id", type=str, required=True)
738+
@click.option(
739+
"--scenarios_file",
740+
type=click.Path(
741+
exists=True, dir_okay=False, file_okay=True, resolve_path=True
742+
),
743+
help="A path to file containing JSON serialized ConversationScenarios.",
744+
required=True,
745+
)
746+
@click.option(
747+
"--session_input_file",
748+
type=click.Path(
749+
exists=True, dir_okay=False, file_okay=True, resolve_path=True
750+
),
751+
help=(
752+
"Optional. Path to session file containing SessionInput in JSON format."
753+
),
754+
default=None,
755+
)
756+
@eval_options()
757+
def cli_add_eval_case(
758+
agent_module_file_path: str,
759+
eval_set_id: str,
760+
scenarios_file: str,
761+
eval_storage_uri: Optional[str] = None,
762+
session_input_file: Optional[str] = None,
763+
):
764+
"""Adds eval cases to the given eval set.
765+
766+
There are several ways that an eval case can be created, for now this method
767+
only supports adding one using a conversation scenarios file.
768+
769+
If an eval case for the generated id already exists, then we skip adding it.
770+
"""
771+
try:
772+
from ..evaluation.conversation_scenarios import ConversationScenarios
773+
from ..evaluation.eval_case import EvalCase
774+
from ..evaluation.eval_case import SessionInput
775+
from .cli_eval import get_eval_sets_manager
776+
except ModuleNotFoundError as mnf:
777+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
778+
779+
app_name = os.path.basename(agent_module_file_path)
780+
agents_dir = os.path.dirname(agent_module_file_path)
781+
eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
782+
783+
try:
784+
session_input = None
785+
if session_input_file:
786+
with open(session_input_file, "r") as f:
787+
session_input = SessionInput.model_validate_json(f.read())
788+
789+
with open(scenarios_file, "r") as f:
790+
conversation_scenarios = ConversationScenarios.model_validate_json(
791+
f.read()
792+
)
793+
794+
for scenario in conversation_scenarios.scenarios:
795+
scenario_str = json.dumps(scenario.model_dump(), sort_keys=True)
796+
eval_id = hashlib.sha256(scenario_str.encode("utf-8")).hexdigest()[:8]
797+
eval_case = EvalCase(
798+
eval_id=eval_id,
799+
conversation_scenario=scenario,
800+
session_input=session_input,
801+
creation_timestamp=datetime.now().timestamp(),
802+
)
803+
804+
if (
805+
eval_sets_manager.get_eval_case(
806+
app_name=app_name, eval_set_id=eval_set_id, eval_case_id=eval_id
807+
)
808+
is None
809+
):
810+
eval_sets_manager.add_eval_case(
811+
app_name=app_name, eval_set_id=eval_set_id, eval_case=eval_case
812+
)
813+
click.echo(
814+
f"Eval case '{eval_case.eval_id}' added to eval set"
815+
f" '{eval_set_id}'."
816+
)
817+
else:
818+
click.echo(
819+
f"Eval case '{eval_case.eval_id}' already exists in eval set"
820+
f" '{eval_set_id}', skipped adding."
821+
)
822+
except Exception as e:
823+
raise click.ClickException(f"Failed to add eval case(s): {e}") from e
824+
825+
678826
def web_options():
679827
"""Decorator to add web UI options to click commands."""
680828

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Unit tests for utilities in cli_eval."""
16+
17+
from __future__ import annotations
18+
19+
from types import SimpleNamespace
20+
from unittest import mock
21+
22+
23+
def test_get_eval_sets_manager_local(monkeypatch):
24+
mock_local_manager = mock.MagicMock()
25+
monkeypatch.setattr(
26+
"google.adk.evaluation.local_eval_sets_manager.LocalEvalSetsManager",
27+
lambda *a, **k: mock_local_manager,
28+
)
29+
from google.adk.cli.cli_eval import get_eval_sets_manager
30+
31+
manager = get_eval_sets_manager(eval_storage_uri=None, agents_dir="some/dir")
32+
assert manager == mock_local_manager
33+
34+
35+
def test_get_eval_sets_manager_gcs(monkeypatch):
36+
mock_gcs_manager = mock.MagicMock()
37+
mock_create_gcs = mock.MagicMock()
38+
mock_create_gcs.return_value = SimpleNamespace(
39+
eval_sets_manager=mock_gcs_manager
40+
)
41+
monkeypatch.setattr(
42+
"google.adk.cli.utils.evals.create_gcs_eval_managers_from_uri",
43+
mock_create_gcs,
44+
)
45+
from google.adk.cli.cli_eval import get_eval_sets_manager
46+
47+
manager = get_eval_sets_manager(
48+
eval_storage_uri="gs://bucket", agents_dir="some/dir"
49+
)
50+
assert manager == mock_gcs_manager
51+
mock_create_gcs.assert_called_once_with("gs://bucket")

0 commit comments

Comments
 (0)