1818from contextlib import asynccontextmanager
1919from datetime import datetime
2020import functools
21+ import hashlib
22+ import json
2123import logging
2224import os
2325from pathlib import Path
@@ -433,6 +435,28 @@ def cli_run(
433435 )
434436
435437
438+ def eval_options ():
439+ """Decorator to add common eval options to click commands."""
440+
441+ def decorator (func ):
442+ @click .option (
443+ "--eval_storage_uri" ,
444+ type = str ,
445+ help = (
446+ "Optional. The evals storage URI to store agent evals,"
447+ " supported URIs: gs://<bucket name>."
448+ ),
449+ default = None ,
450+ )
451+ @functools .wraps (func )
452+ def wrapper (* args , ** kwargs ):
453+ return func (* args , ** kwargs )
454+
455+ return wrapper
456+
457+ return decorator
458+
459+
436460@main .command ("eval" , cls = HelpfulCommand )
437461@click .argument (
438462 "agent_module_file_path" ,
@@ -449,15 +473,7 @@ def cli_run(
449473 default = False ,
450474 help = "Optional. Whether to print detailed results on console or not." ,
451475)
452- @click .option (
453- "--eval_storage_uri" ,
454- type = str ,
455- help = (
456- "Optional. The evals storage URI to store agent evals,"
457- " supported URIs: gs://<bucket name>."
458- ),
459- default = None ,
460- )
476+ @eval_options ()
461477def cli_eval (
462478 agent_module_file_path : str ,
463479 eval_set_file_path_or_id : list [str ],
@@ -675,6 +691,138 @@ def cli_eval(
675691 pretty_print_eval_result (eval_result )
676692
677693
694+ @main .group ("eval_set" )
695+ def eval_set ():
696+ """Manage Eval Sets."""
697+ pass
698+
699+
700+ @eval_set .command ("create" , cls = HelpfulCommand )
701+ @click .argument (
702+ "agent_module_file_path" ,
703+ type = click .Path (
704+ exists = True , dir_okay = True , file_okay = False , resolve_path = True
705+ ),
706+ )
707+ @click .argument ("eval_set_id" , type = str , required = True )
708+ @eval_options ()
709+ def cli_create_eval_set (
710+ agent_module_file_path : str ,
711+ eval_set_id : str ,
712+ eval_storage_uri : Optional [str ] = None ,
713+ ):
714+ """Creates an empty EvalSet given the agent_module_file_path and eval_set_id."""
715+ from .cli_eval import get_eval_sets_manager
716+
717+ app_name = os .path .basename (agent_module_file_path )
718+ agents_dir = os .path .dirname (agent_module_file_path )
719+ eval_sets_manager = get_eval_sets_manager (eval_storage_uri , agents_dir )
720+
721+ try :
722+ eval_sets_manager .create_eval_set (
723+ app_name = app_name , eval_set_id = eval_set_id
724+ )
725+ click .echo (f"Eval set '{ eval_set_id } ' created for app '{ app_name } '." )
726+ except ValueError as e :
727+ raise click .ClickException (str (e ))
728+
729+
730+ @eval_set .command ("add_eval_case" , cls = HelpfulCommand )
731+ @click .argument (
732+ "agent_module_file_path" ,
733+ type = click .Path (
734+ exists = True , dir_okay = True , file_okay = False , resolve_path = True
735+ ),
736+ )
737+ @click .argument ("eval_set_id" , type = str , required = True )
738+ @click .option (
739+ "--scenarios_file" ,
740+ type = click .Path (
741+ exists = True , dir_okay = False , file_okay = True , resolve_path = True
742+ ),
743+ help = "A path to file containing JSON serialized ConversationScenarios." ,
744+ required = True ,
745+ )
746+ @click .option (
747+ "--session_input_file" ,
748+ type = click .Path (
749+ exists = True , dir_okay = False , file_okay = True , resolve_path = True
750+ ),
751+ help = (
752+ "Optional. Path to session file containing SessionInput in JSON format."
753+ ),
754+ default = None ,
755+ )
756+ @eval_options ()
757+ def cli_add_eval_case (
758+ agent_module_file_path : str ,
759+ eval_set_id : str ,
760+ scenarios_file : str ,
761+ eval_storage_uri : Optional [str ] = None ,
762+ session_input_file : Optional [str ] = None ,
763+ ):
764+ """Adds eval cases to the given eval set.
765+
766+ There are several ways that an eval case can be created, for now this method
767+ only supports adding one using a conversation scenarios file.
768+
769+ If an eval case for the generated id already exists, then we skip adding it.
770+ """
771+ try :
772+ from ..evaluation .conversation_scenarios import ConversationScenarios
773+ from ..evaluation .eval_case import EvalCase
774+ from ..evaluation .eval_case import SessionInput
775+ from .cli_eval import get_eval_sets_manager
776+ except ModuleNotFoundError as mnf :
777+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
778+
779+ app_name = os .path .basename (agent_module_file_path )
780+ agents_dir = os .path .dirname (agent_module_file_path )
781+ eval_sets_manager = get_eval_sets_manager (eval_storage_uri , agents_dir )
782+
783+ try :
784+ session_input = None
785+ if session_input_file :
786+ with open (session_input_file , "r" ) as f :
787+ session_input = SessionInput .model_validate_json (f .read ())
788+
789+ with open (scenarios_file , "r" ) as f :
790+ conversation_scenarios = ConversationScenarios .model_validate_json (
791+ f .read ()
792+ )
793+
794+ for scenario in conversation_scenarios .scenarios :
795+ scenario_str = json .dumps (scenario .model_dump (), sort_keys = True )
796+ eval_id = hashlib .sha256 (scenario_str .encode ("utf-8" )).hexdigest ()[:8 ]
797+ eval_case = EvalCase (
798+ eval_id = eval_id ,
799+ conversation_scenario = scenario ,
800+ session_input = session_input ,
801+ creation_timestamp = datetime .now ().timestamp (),
802+ )
803+
804+ if (
805+ eval_sets_manager .get_eval_case (
806+ app_name = app_name , eval_set_id = eval_set_id , eval_case_id = eval_id
807+ )
808+ is None
809+ ):
810+ eval_sets_manager .add_eval_case (
811+ app_name = app_name , eval_set_id = eval_set_id , eval_case = eval_case
812+ )
813+ click .echo (
814+ f"Eval case '{ eval_case .eval_id } ' added to eval set"
815+ f" '{ eval_set_id } '."
816+ )
817+ else :
818+ click .echo (
819+ f"Eval case '{ eval_case .eval_id } ' already exists in eval set"
820+ f" '{ eval_set_id } ', skipped adding."
821+ )
822+ except Exception as e :
823+ raise click .ClickException (f"Failed to add eval case(s): { e } " ) from e
824+
825+
678826def web_options ():
679827 """Decorator to add web UI options to click commands."""
680828
0 commit comments