UiPath · bai-uipath · Oct 16, 2025 · Oct 16, 2025
diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py
@@ -44,6 +44,15 @@ class LLMMockingStrategy(BaseMockingStrategy):
     )
 
 
+class InputMockingStrategy(BaseModel):
+    prompt: str = Field(..., alias="prompt")
+    model: Optional[ModelSettings] = Field(None, alias="model")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
 class MockingArgument(BaseModel):
     args: List[Any] = Field(default_factory=lambda: [], alias="args")
     kwargs: Dict[str, Any] = Field(default_factory=lambda: {}, alias="kwargs")
@@ -110,6 +119,10 @@ class EvaluationItem(BaseModel):
         default=None,
         alias="mockingStrategy",
     )
+    input_mocking_strategy: Optional[InputMockingStrategy] = Field(
+        default=None,
+        alias="inputMockingStrategy",
+    )
 
 
 class EvaluationSet(BaseModel):

diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -11,6 +11,10 @@
 from opentelemetry.sdk.trace import ReadableSpan, Span
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
 
+from uipath._cli._evals.mocks.input_mocker import (
+    generate_llm_input,
+)
+
 from ..._events._event_bus import EventBus
 from ..._events._events import (
     EvalItemExceptionDetails,
@@ -318,6 +322,10 @@ async def _execute_eval(
         evaluators: List[BaseEvaluator[Any]],
         event_bus: EventBus,
     ) -> EvaluationRunResult:
+        # Generate LLM-based input if input_mocking_strategy is defined
+        if eval_item.input_mocking_strategy:
+            eval_item = await self._generate_input_for_eval(eval_item)
+
         set_execution_context(eval_item, self.span_collector)
 
         await event_bus.publish(
@@ -417,6 +425,16 @@ async def _execute_eval(
 
         return evaluation_run_results
 
+    async def _generate_input_for_eval(
+        self, eval_item: EvaluationItem
+    ) -> EvaluationItem:
+        """Use LLM to generate a mock input for an evaluation item."""
+        # TODO(bai): get the input schema from agent definition, once it is available there.
+        input_schema: dict[str, Any] = {}
+        generated_input = await generate_llm_input(eval_item, input_schema)
+        updated_eval_item = eval_item.model_copy(update={"inputs": generated_input})
+        return updated_eval_item
+
     def _get_and_clear_execution_data(
         self, execution_id: str
     ) -> tuple[List[ReadableSpan], list[logging.LogRecord]]:

diff --git a/src/uipath/_cli/_evals/mocks/input_mocker.py b/src/uipath/_cli/_evals/mocks/input_mocker.py
@@ -0,0 +1,111 @@
+"""LLM Input Mocker implementation."""
+
+import json
+from datetime import datetime
+from typing import Any, Dict
+
+from uipath import UiPath
+from uipath._cli._evals._models._evaluation_set import EvaluationItem
+from uipath.tracing._traced import traced
+
+from .mocker import UiPathInputMockingError
+
+
+def get_input_mocking_prompt(
+    input_schema: str,
+    input_generation_instructions: str,
+    expected_behavior: str,
+    expected_output: str,
+) -> str:
+    """Generate the LLM input mocking prompt."""
+    current_datetime = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+
+    return f"""You are simulating input for automated testing purposes of an Agent as part of a simulation run.
+You will need to generate realistic input to a LLM agent which will call various tools to achieve a goal. This must be in the exact format of the INPUT_SCHEMA.
+You may need to follow specific INPUT_GENERATION_INSTRUCTIONS. If no relevant instructions are provided pertaining to input generation, use the other provided information and your own judgement to generate input.
+If the INPUT_GENERATION_INSTRUCTIONS are provided, you MUST follow them exactly. For example if the instructions say to generate a value for a field to be before a certain calendar date, you must generate a value that is before that date.
+
+The current date and time is: {current_datetime}
+
+#INPUT_SCHEMA: You MUST OUTPUT THIS EXACT JSON SCHEMA
+{input_schema}
+#END_INPUT_SCHEMA
+
+#INPUT_GENERATION_INSTRUCTIONS
+{input_generation_instructions}
+#END_INPUT_GENERATION_INSTRUCTIONS
+
+#EXPECTED_BEHAVIOR
+{expected_behavior}
+#END_EXPECTED_BEHAVIOR
+
+#EXPECTED_OUTPUT
+{expected_output}
+#END_EXPECTED_OUTPUT
+
+Based on the above information, provide a realistic input to the LLM agent. Your response should:
+1. Match the expected input format according to the INPUT_SCHEMA exactly
+2. Be consistent with the style and level of detail in the example inputs
+3. Consider the context of the the agent being tested
+4. Be realistic and representative of what a real user might say or ask
+
+OUTPUT: ONLY the simulated agent input in the exact format of the INPUT_SCHEMA in valid JSON. Do not include any explanations, quotation marks, or markdown."""
+
+
+@traced(name="__mocker__")
+async def generate_llm_input(
+    evaluation_item: EvaluationItem,
+    input_schema: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate synthetic input using an LLM based on the evaluation context."""
+    try:
+        llm = UiPath().llm
+
+        prompt = get_input_mocking_prompt(
+            input_schema=json.dumps(input_schema, indent=2),
+            input_generation_instructions=evaluation_item.input_mocking_strategy.prompt
+            if evaluation_item.input_mocking_strategy
+            else "",
+            expected_behavior=evaluation_item.expected_agent_behavior or "",
+            expected_output=json.dumps(evaluation_item.expected_output, indent=2)
+            if evaluation_item.expected_output
+            else "",
+        )
+
+        response_format = {
+            "type": "json_schema",
+            "json_schema": {
+                "name": "agent_input",
+                "strict": True,
+                "schema": input_schema,
+            },
+        }
+
+        model_parameters = (
+            evaluation_item.input_mocking_strategy.model
+            if evaluation_item.input_mocking_strategy
+            else None
+        )
+        completion_kwargs = (
+            model_parameters.model_dump(by_alias=False, exclude_none=True)
+            if model_parameters
+            else {}
+        )
+
+        response = await llm.chat_completions(
+            [{"role": "user", "content": prompt}],
+            response_format=response_format,
+            **completion_kwargs,
+        )
+
+        generated_input_str = response.choices[0].message.content
+
+        return json.loads(generated_input_str)
+    except json.JSONDecodeError as e:
+        raise UiPathInputMockingError(
+            f"Failed to parse LLM response as JSON: {str(e)}"
+        ) from e
+    except UiPathInputMockingError:
+        raise
+    except Exception as e:
+        raise UiPathInputMockingError(f"Failed to generate input: {str(e)}") from e
diff --git a/src/uipath/_cli/_evals/mocks/mocker.py b/src/uipath/_cli/_evals/mocks/mocker.py
@@ -33,3 +33,9 @@ class UiPathMockResponseGenerationError(Exception):
     """Exception when a mocker is configured unable to generate a response."""
 
     pass
+
+
+class UiPathInputMockingError(Exception):
+    """Exception when input mocking fails."""
+
+    pass
diff --git a/src/uipath/agent/_utils.py b/src/uipath/agent/_utils.py
@@ -4,7 +4,10 @@
 from httpx import Response
 from pydantic import TypeAdapter
 
-from uipath._cli._evals._models._evaluation_set import LLMMockingStrategy
+from uipath._cli._evals._models._evaluation_set import (
+    InputMockingStrategy,
+    LLMMockingStrategy,
+)
 from uipath._cli._push.sw_file_handler import SwFileHandler
 from uipath._cli._utils._studio_project import (
     ProjectFile,
@@ -137,4 +140,14 @@ async def load_agent_definition(project_id: str) -> AgentDefinition:
                             evaluation.mocking_strategy = LLMMockingStrategy(
                                 prompt=prompt, tools_to_simulate=tools_to_simulate
                             )
+
+                    if not evaluation.input_mocking_strategy:
+                        # Migrate lowCode input mocking fields
+                        if evaluation.model_extra.get("simulateInput", False):
+                            prompt = evaluation.model_extra.get(
+                                "inputGenerationInstructions",
+                            )
+                            evaluation.input_mocking_strategy = InputMockingStrategy(
+                                prompt=prompt
+                            )
     return agent_definition
diff --git a/tests/cli/eval/mocks/test_input_mocker.py b/tests/cli/eval/mocks/test_input_mocker.py
@@ -0,0 +1,106 @@
+from typing import Any
+
+import pytest
+from _pytest.monkeypatch import MonkeyPatch
+from pytest_httpx import HTTPXMock
+
+from uipath._cli._evals._models._evaluation_set import (
+    EvaluationItem,
+    InputMockingStrategy,
+    ModelSettings,
+)
+from uipath._cli._evals.mocks.input_mocker import generate_llm_input
+
+
+@pytest.mark.asyncio
+@pytest.mark.httpx_mock(assert_all_responses_were_requested=False)
+async def test_generate_llm_input_with_model_settings(
+    httpx_mock: HTTPXMock, monkeypatch: MonkeyPatch
+):
+    monkeypatch.setenv("UIPATH_URL", "https://example.com")
+    monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token")
+
+    evaluation_item: dict[str, Any] = {
+        "id": "test-eval-id",
+        "name": "Test Input Generation",
+        "inputs": {},
+        "expectedOutput": {"result": 35},
+        "expectedAgentBehavior": "Agent should multiply the numbers",
+        "inputMockingStrategy": {
+            "prompt": "Generate a multiplication query with 5 and 7",
+            "model": {
+                "model": "gpt-4o-mini-2024-07-18",
+                "temperature": 0.5,
+                "maxTokens": 150,
+            },
+        },
+        "evalSetId": "test-eval-set-id",
+        "createdAt": "2025-09-04T18:54:58.378Z",
+        "updatedAt": "2025-09-04T18:55:55.416Z",
+    }
+    eval_item = EvaluationItem(**evaluation_item)
+
+    assert isinstance(eval_item.input_mocking_strategy, InputMockingStrategy)
+    assert isinstance(eval_item.input_mocking_strategy.model, ModelSettings)
+    assert eval_item.input_mocking_strategy.model.model == "gpt-4o-mini-2024-07-18"
+    assert eval_item.input_mocking_strategy.model.temperature == 0.5
+    assert eval_item.input_mocking_strategy.model.max_tokens == 150
+
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string"},
+        },
+        "required": ["query"],
+        "additionalProperties": False,
+    }
+
+    httpx_mock.add_response(
+        url="https://example.com/agenthub_/llm/api/capabilities",
+        status_code=200,
+        json={},
+    )
+    httpx_mock.add_response(
+        url="https://example.com/orchestrator_/llm/api/capabilities",
+        status_code=200,
+        json={},
+    )
+
+    httpx_mock.add_response(
+        url="https://example.com/api/chat/completions?api-version=2024-08-01-preview",
+        status_code=200,
+        json={
+            "role": "assistant",
+            "id": "response-id",
+            "object": "chat.completion",
+            "created": 0,
+            "model": "gpt-4o-mini-2024-07-18",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": '{"query": "Calculate 5 times 7"}',
+                        "tool_calls": None,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 20,
+                "total_tokens": 120,
+            },
+        },
+    )
+
+    result = await generate_llm_input(eval_item, input_schema)
+
+    # Verify the mocked input is correct
+    assert result == {"query": "Calculate 5 times 7"}
+
+    requests = httpx_mock.get_requests()
+    chat_completion_requests = [r for r in requests if "chat/completions" in str(r.url)]
+    assert len(chat_completion_requests) == 1, (
+        "Expected exactly one chat completion request"
+    )
diff --git a/tests/cli/eval/mocks/test_mocks.py b/tests/cli/eval/mocks/test_mocks.py
@@ -136,6 +136,7 @@ async def foofoo(*args, **kwargs):
     assert await foo(x=2) == "bar1"
 
 
+@pytest.mark.httpx_mock(assert_all_responses_were_requested=False)
 def test_llm_mockable_sync(httpx_mock: HTTPXMock, monkeypatch: MonkeyPatch):
     monkeypatch.setenv("UIPATH_URL", "https://example.com")
     monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "1234567890")