Skip to content

Commit 4948dfc

Browse files
committed
feat(InputMocking): generate mock input using LLM given instructions
1 parent a8fe199 commit 4948dfc

File tree

7 files changed

+236
-7
lines changed

7 files changed

+236
-7
lines changed

samples/calculator/evals/eval-sets/default.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@
6464
"evalSetId": "default-eval-set-id",
6565
"createdAt": "2025-09-04T18:54:58.378Z",
6666
"updatedAt": "2025-09-04T18:55:55.416Z"
67+
},
68+
{
69+
"id": "test-with-llm-input-mocking",
70+
"name": "Test with LLM input mocking",
71+
"inputs": {},
72+
"expectedOutput": {
73+
"result": 35
74+
},
75+
"expectedAgentBehavior": "",
76+
"simulateInput": true,
77+
"inputGenerationInstructions": "Generate a multiplication calculation where the first number is 5 and the second number is 7",
78+
"simulationInstructions": "",
79+
"evalSetId": "default-eval-set-id",
80+
"createdAt": "2025-09-04T18:54:58.378Z",
81+
"updatedAt": "2025-09-04T18:55:55.416Z"
6782
}
6883
],
6984
"modelSettings": [],

src/uipath/_cli/_evals/_models/_evaluation_set.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ class EvaluationItem(BaseModel):
103103
inputs: Dict[str, Any]
104104
expected_output: Dict[str, Any]
105105
expected_agent_behavior: str = Field(default="", alias="expectedAgentBehavior")
106+
simulate_input: bool = Field(default=False, alias="simulateInput")
107+
input_generation_instructions: str = Field(
108+
default="", alias="inputGenerationInstructions"
109+
)
110+
simulation_instructions: str = Field(default="", alias="simulationInstructions")
106111
eval_set_id: str = Field(alias="evalSetId")
107112
created_at: str = Field(alias="createdAt")
108113
updated_at: str = Field(alias="updatedAt")

src/uipath/_cli/_evals/_runtime.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
from opentelemetry.sdk.trace import ReadableSpan, Span
1212
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
1313

14+
from uipath._cli._evals.mocks.input_mocker import (
15+
extract_input_schema_from_entrypoint,
16+
generate_llm_input,
17+
)
18+
from uipath._cli._evals.mocks.mocker import UiPathInputMockingError
19+
1420
from ..._events._event_bus import EventBus
1521
from ..._events._events import (
1622
EvalItemExceptionDetails,
@@ -318,6 +324,10 @@ async def _execute_eval(
318324
evaluators: List[BaseEvaluator[Any]],
319325
event_bus: EventBus,
320326
) -> EvaluationRunResult:
327+
# Generate LLM-based input if simulate_input is enabled
328+
if eval_item.simulate_input:
329+
eval_item = await self._generate_input_for_eval(eval_item)
330+
321331
set_execution_context(eval_item, self.span_collector)
322332

323333
await event_bus.publish(
@@ -417,6 +427,18 @@ async def _execute_eval(
417427

418428
return evaluation_run_results
419429

430+
async def _generate_input_for_eval(
431+
self, eval_item: EvaluationItem
432+
) -> EvaluationItem:
433+
"""Use LLM to generate a mock input for an evaluation item."""
434+
if not self.context.entrypoint:
435+
raise UiPathInputMockingError("entrypoint must be provided for eval runs")
436+
437+
input_schema = extract_input_schema_from_entrypoint(self.context.entrypoint)
438+
generated_input = await generate_llm_input(eval_item, input_schema)
439+
updated_eval_item = eval_item.model_copy(update={"inputs": generated_input})
440+
return updated_eval_item
441+
420442
def _get_and_clear_execution_data(
421443
self, execution_id: str
422444
) -> tuple[List[ReadableSpan], list[logging.LogRecord]]:
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""LLM Input Mocker implementation."""
2+
3+
import importlib.util
4+
import inspect
5+
import json
6+
from datetime import datetime
7+
from typing import Any, Dict
8+
9+
from pydantic import TypeAdapter
10+
11+
from uipath import UiPath
12+
from uipath._cli._evals._models._evaluation_set import EvaluationItem
13+
from uipath._services.llm_gateway_service import _cleanup_schema
14+
from uipath.tracing._traced import traced
15+
16+
from .mocker import UiPathInputMockingError
17+
18+
19+
def get_input_mocking_prompt(
20+
input_schema: str,
21+
test_run_proctor_instructions: str,
22+
input_generation_instructions: str,
23+
expected_behavior: str,
24+
expected_output: str,
25+
) -> str:
26+
"""Generate the LLM input mocking prompt."""
27+
current_datetime = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
28+
29+
return f"""You are simulating input for automated testing purposes of an Agent as part of a simulation run.
30+
You will need to generate realistic input to a LLM agent which will call various tools to achieve a goal. This must be in the exact format of the INPUT_SCHEMA.
31+
You may need to follow specific INPUT_GENERATION_INSTRUCTIONS. If no relevant instructions are provided pertaining to input generation, use the other provided information and your own judgement to generate input.
32+
If the INPUT_GENERATION_INSTRUCTIONS are provided, you MUST follow them exactly. For example if the instructions say to generate a value for a field to be before a certain calendar date, you must generate a value that is before that date.
33+
The SIMULATION_INSTRUCTIONS will provide context around how the tools are being simulated.
34+
35+
36+
The current date and time is: {current_datetime}
37+
38+
#INPUT_SCHEMA: You MUST OUTPUT THIS EXACT JSON SCHEMA
39+
{input_schema}
40+
#END_INPUT_SCHEMA
41+
42+
#INPUT_GENERATION_INSTRUCTIONS
43+
{input_generation_instructions}
44+
#END_INPUT_GENERATION_INSTRUCTIONS
45+
46+
#SIMULATION_INSTRUCTIONS
47+
{test_run_proctor_instructions}
48+
#END_SIMULATION_INSTRUCTIONS
49+
50+
#EXPECTED_BEHAVIOR
51+
{expected_behavior}
52+
#END_EXPECTED_BEHAVIOR
53+
54+
#EXPECTED_OUTPUT
55+
{expected_output}
56+
#END_EXPECTED_OUTPUT
57+
58+
Based on the above information, provide a realistic input to the LLM agent. Your response should:
59+
1. Match the expected input format according to the INPUT_SCHEMA exactly
60+
2. Be consistent with the style and level of detail in the example inputs
61+
3. Consider the context of the the agent being tested
62+
4. Be realistic and representative of what a real user might say or ask
63+
64+
OUTPUT: ONLY the simulated agent input in the exact format of the INPUT_SCHEMA in valid JSON. Do not include any explanations, quotation marks, or markdown."""
65+
66+
67+
def extract_input_schema_from_entrypoint(entrypoint_path: str) -> Dict[str, Any]:
68+
"""Extract JSON schema from the entrypoint file's main function."""
69+
spec = importlib.util.spec_from_file_location("entrypoint_module", entrypoint_path)
70+
if not spec or not spec.loader:
71+
raise UiPathInputMockingError(
72+
f"Failed to load module spec from entrypoint: {entrypoint_path}"
73+
)
74+
75+
module = importlib.util.module_from_spec(spec)
76+
spec.loader.exec_module(module)
77+
78+
# Check for main, run, or execute (same as ScriptExecutor)
79+
for func_name in ["main", "run", "execute"]:
80+
func = getattr(module, func_name, None)
81+
if func is not None:
82+
sig = inspect.signature(func)
83+
params = list(sig.parameters.values())
84+
85+
if not params:
86+
continue
87+
88+
first_param = params[0]
89+
if first_param.annotation == inspect.Parameter.empty:
90+
continue
91+
92+
adapter = TypeAdapter(first_param.annotation)
93+
return adapter.json_schema()
94+
95+
raise UiPathInputMockingError(
96+
f"No suitable entrypoint (main, run, execute) with typed parameters found in {entrypoint_path}"
97+
)
98+
99+
100+
@traced(name="__mocker__")
101+
async def generate_llm_input(
102+
evaluation_item: EvaluationItem,
103+
input_schema: Dict[str, Any],
104+
) -> Dict[str, Any]:
105+
"""Generate synthetic input using an LLM based on the evaluation context."""
106+
try:
107+
llm = UiPath().llm
108+
109+
prompt = get_input_mocking_prompt(
110+
input_schema=json.dumps(input_schema, indent=2),
111+
test_run_proctor_instructions=evaluation_item.simulation_instructions or "",
112+
input_generation_instructions=evaluation_item.input_generation_instructions
113+
or "",
114+
expected_behavior=evaluation_item.expected_agent_behavior or "",
115+
expected_output=json.dumps(evaluation_item.expected_output, indent=2)
116+
if evaluation_item.expected_output
117+
else "",
118+
)
119+
120+
cleaned_schema = _cleanup_schema(input_schema)
121+
122+
response_format = {
123+
"type": "json_schema",
124+
"json_schema": {
125+
"name": "agent_input",
126+
"strict": True,
127+
"schema": cleaned_schema,
128+
},
129+
}
130+
131+
response = await llm.chat_completions(
132+
[{"role": "user", "content": prompt}],
133+
temperature=0.3,
134+
response_format=response_format,
135+
)
136+
137+
generated_input_str = response.choices[0].message.content
138+
if not generated_input_str:
139+
raise UiPathInputMockingError("LLM returned empty response")
140+
141+
return json.loads(generated_input_str)
142+
except json.JSONDecodeError as e:
143+
raise UiPathInputMockingError(
144+
f"Failed to parse LLM response as JSON: {str(e)}"
145+
) from e
146+
except UiPathInputMockingError:
147+
raise
148+
except Exception as e:
149+
raise UiPathInputMockingError(f"Failed to generate input: {str(e)}") from e

src/uipath/_cli/_evals/mocks/mocker.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,9 @@ class UiPathMockResponseGenerationError(Exception):
3333
"""Exception when a mocker is configured unable to generate a response."""
3434

3535
pass
36+
37+
38+
class UiPathInputMockingError(Exception):
39+
"""Exception when input mocking fails."""
40+
41+
pass

src/uipath/_services/llm_gateway_service.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,18 @@ class EmbeddingModels(object):
7777
text_embedding_ada_002 = "text-embedding-ada-002"
7878

7979

80-
def _cleanup_schema(model_class: type[BaseModel]) -> Dict[str, Any]:
81-
"""Clean up a Pydantic model schema for use with LLM Gateway.
80+
def _cleanup_schema(
81+
model_or_schema: Union[type[BaseModel], Dict[str, Any]],
82+
) -> Dict[str, Any]:
83+
"""Clean up a Pydantic model schema or raw JSON schema for use with LLM Gateway.
8284
83-
This function converts a Pydantic model's JSON schema to a format that's
84-
compatible with the LLM Gateway's JSON schema requirements by removing
85-
titles and other metadata that might cause validation issues.
85+
This function converts a Pydantic model's JSON schema or a raw JSON schema dict
86+
to a format that's compatible with the LLM Gateway's JSON schema requirements by
87+
removing titles and other metadata that might cause validation issues.
8688
8789
Args:
88-
model_class (type[BaseModel]): A Pydantic BaseModel class to convert to schema.
90+
model_or_schema (Union[type[BaseModel], Dict[str, Any]]): Either a Pydantic
91+
BaseModel class or a JSON schema dictionary to clean.
8992
9093
Returns:
9194
dict: A cleaned JSON schema dictionary suitable for LLM Gateway response_format.
@@ -102,9 +105,16 @@ class Country(BaseModel):
102105
103106
schema = _cleanup_schema(Country)
104107
# Returns a clean schema without titles and unnecessary metadata
108+
109+
# Or pass a schema dict directly
110+
schema_dict = {"type": "object", "properties": {...}}
111+
schema = _cleanup_schema(schema_dict)
105112
```
106113
"""
107-
schema = model_class.model_json_schema()
114+
if isinstance(model_or_schema, dict):
115+
schema = model_or_schema
116+
else:
117+
schema = model_or_schema.model_json_schema()
108118

109119
def clean_type(type_def):
110120
"""Clean property definitions by removing titles and cleaning nested items. Additionally, `additionalProperties` is ensured on all objects."""

tests/sdk/services/test_llm_schema_cleanup.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,25 @@ class ExtendedEntity(BaseEntity):
230230
assert "name" in required_fields
231231
# description is optional, so not in required
232232
assert "description" not in required_fields
233+
234+
def test_cleanup_with_schema_dict(self):
235+
"""Test that _cleanup_schema can accept a schema dict directly."""
236+
schema_dict = {
237+
"type": "object",
238+
"title": "TestSchema",
239+
"properties": {
240+
"name": {"type": "string", "title": "Name"},
241+
"age": {"type": "integer", "title": "Age"},
242+
},
243+
"required": ["name", "age"],
244+
}
245+
246+
schema = _cleanup_schema(schema_dict)
247+
248+
assert schema["type"] == "object"
249+
assert schema["additionalProperties"] is False
250+
assert "title" not in schema
251+
assert "name" in schema["properties"]
252+
assert "age" in schema["properties"]
253+
assert "title" not in schema["properties"]["name"]
254+
assert "title" not in schema["properties"]["age"]

0 commit comments

Comments
 (0)