-
Notifications
You must be signed in to change notification settings - Fork 18
feat(InputMocking): generate mock input using LLM given instructions #695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| """LLM Input Mocker implementation.""" | ||
|
|
||
| import json | ||
| from datetime import datetime | ||
| from typing import Any, Dict | ||
|
|
||
| from uipath import UiPath | ||
| from uipath._cli._evals._models._evaluation_set import EvaluationItem | ||
| from uipath.tracing._traced import traced | ||
|
|
||
| from .mocker import UiPathInputMockingError | ||
|
|
||
|
|
||
| def get_input_mocking_prompt( | ||
| input_schema: str, | ||
| input_generation_instructions: str, | ||
| expected_behavior: str, | ||
| expected_output: str, | ||
| ) -> str: | ||
| """Generate the LLM input mocking prompt.""" | ||
| current_datetime = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") | ||
|
|
||
| return f"""You are simulating input for automated testing purposes of an Agent as part of a simulation run. | ||
| You will need to generate realistic input to a LLM agent which will call various tools to achieve a goal. This must be in the exact format of the INPUT_SCHEMA. | ||
| You may need to follow specific INPUT_GENERATION_INSTRUCTIONS. If no relevant instructions are provided pertaining to input generation, use the other provided information and your own judgement to generate input. | ||
| If the INPUT_GENERATION_INSTRUCTIONS are provided, you MUST follow them exactly. For example if the instructions say to generate a value for a field to be before a certain calendar date, you must generate a value that is before that date. | ||
|
|
||
| The current date and time is: {current_datetime} | ||
|
|
||
| #INPUT_SCHEMA: You MUST OUTPUT THIS EXACT JSON SCHEMA | ||
| {input_schema} | ||
| #END_INPUT_SCHEMA | ||
|
|
||
| #INPUT_GENERATION_INSTRUCTIONS | ||
| {input_generation_instructions} | ||
| #END_INPUT_GENERATION_INSTRUCTIONS | ||
|
|
||
| #EXPECTED_BEHAVIOR | ||
| {expected_behavior} | ||
| #END_EXPECTED_BEHAVIOR | ||
|
|
||
| #EXPECTED_OUTPUT | ||
| {expected_output} | ||
| #END_EXPECTED_OUTPUT | ||
|
|
||
| Based on the above information, provide a realistic input to the LLM agent. Your response should: | ||
| 1. Match the expected input format according to the INPUT_SCHEMA exactly | ||
| 2. Be consistent with the style and level of detail in the example inputs | ||
| 3. Consider the context of the the agent being tested | ||
| 4. Be realistic and representative of what a real user might say or ask | ||
|
|
||
| OUTPUT: ONLY the simulated agent input in the exact format of the INPUT_SCHEMA in valid JSON. Do not include any explanations, quotation marks, or markdown.""" | ||
|
|
||
|
|
||
| @traced(name="__mocker__") | ||
| async def generate_llm_input( | ||
| evaluation_item: EvaluationItem, | ||
| input_schema: Dict[str, Any], | ||
| ) -> Dict[str, Any]: | ||
| """Generate synthetic input using an LLM based on the evaluation context.""" | ||
| try: | ||
| llm = UiPath().llm | ||
|
|
||
| prompt = get_input_mocking_prompt( | ||
| input_schema=json.dumps(input_schema, indent=2), | ||
| input_generation_instructions=evaluation_item.input_mocking_strategy.prompt | ||
| if evaluation_item.input_mocking_strategy | ||
| else "", | ||
| expected_behavior=evaluation_item.expected_agent_behavior or "", | ||
| expected_output=json.dumps(evaluation_item.expected_output, indent=2) | ||
| if evaluation_item.expected_output | ||
| else "", | ||
| ) | ||
|
|
||
| response_format = { | ||
| "type": "json_schema", | ||
| "json_schema": { | ||
| "name": "agent_input", | ||
| "strict": True, | ||
| "schema": input_schema, | ||
| }, | ||
| } | ||
|
|
||
| model_parameters = ( | ||
| evaluation_item.input_mocking_strategy.model | ||
| if evaluation_item.input_mocking_strategy | ||
| else None | ||
| ) | ||
| completion_kwargs = ( | ||
| model_parameters.model_dump(by_alias=False, exclude_none=True) | ||
| if model_parameters | ||
| else {} | ||
| ) | ||
|
|
||
| response = await llm.chat_completions( | ||
| [{"role": "user", "content": prompt}], | ||
| response_format=response_format, | ||
| **completion_kwargs, | ||
| ) | ||
|
|
||
| generated_input_str = response.choices[0].message.content | ||
|
|
||
| return json.loads(generated_input_str) | ||
| except json.JSONDecodeError as e: | ||
| raise UiPathInputMockingError( | ||
| f"Failed to parse LLM response as JSON: {str(e)}" | ||
| ) from e | ||
| except UiPathInputMockingError: | ||
| raise | ||
| except Exception as e: | ||
| raise UiPathInputMockingError(f"Failed to generate input: {str(e)}") from e | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| from typing import Any | ||
|
|
||
| import pytest | ||
| from _pytest.monkeypatch import MonkeyPatch | ||
| from pytest_httpx import HTTPXMock | ||
|
|
||
| from uipath._cli._evals._models._evaluation_set import ( | ||
| EvaluationItem, | ||
| InputMockingStrategy, | ||
| ModelSettings, | ||
| ) | ||
| from uipath._cli._evals.mocks.input_mocker import generate_llm_input | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.httpx_mock(assert_all_responses_were_requested=False) | ||
| async def test_generate_llm_input_with_model_settings( | ||
| httpx_mock: HTTPXMock, monkeypatch: MonkeyPatch | ||
| ): | ||
| monkeypatch.setenv("UIPATH_URL", "https://example.com") | ||
| monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token") | ||
|
|
||
| evaluation_item: dict[str, Any] = { | ||
| "id": "test-eval-id", | ||
| "name": "Test Input Generation", | ||
| "inputs": {}, | ||
| "expectedOutput": {"result": 35}, | ||
| "expectedAgentBehavior": "Agent should multiply the numbers", | ||
| "inputMockingStrategy": { | ||
| "prompt": "Generate a multiplication query with 5 and 7", | ||
| "model": { | ||
| "model": "gpt-4o-mini-2024-07-18", | ||
| "temperature": 0.5, | ||
| "maxTokens": 150, | ||
| }, | ||
| }, | ||
| "evalSetId": "test-eval-set-id", | ||
| "createdAt": "2025-09-04T18:54:58.378Z", | ||
| "updatedAt": "2025-09-04T18:55:55.416Z", | ||
| } | ||
| eval_item = EvaluationItem(**evaluation_item) | ||
|
|
||
| assert isinstance(eval_item.input_mocking_strategy, InputMockingStrategy) | ||
| assert isinstance(eval_item.input_mocking_strategy.model, ModelSettings) | ||
| assert eval_item.input_mocking_strategy.model.model == "gpt-4o-mini-2024-07-18" | ||
| assert eval_item.input_mocking_strategy.model.temperature == 0.5 | ||
| assert eval_item.input_mocking_strategy.model.max_tokens == 150 | ||
|
|
||
| input_schema = { | ||
| "type": "object", | ||
| "properties": { | ||
| "query": {"type": "string"}, | ||
| }, | ||
| "required": ["query"], | ||
| "additionalProperties": False, | ||
| } | ||
|
|
||
| httpx_mock.add_response( | ||
| url="https://example.com/agenthub_/llm/api/capabilities", | ||
| status_code=200, | ||
| json={}, | ||
| ) | ||
| httpx_mock.add_response( | ||
| url="https://example.com/orchestrator_/llm/api/capabilities", | ||
| status_code=200, | ||
| json={}, | ||
| ) | ||
|
|
||
| httpx_mock.add_response( | ||
| url="https://example.com/api/chat/completions?api-version=2024-08-01-preview", | ||
| status_code=200, | ||
| json={ | ||
| "role": "assistant", | ||
| "id": "response-id", | ||
| "object": "chat.completion", | ||
| "created": 0, | ||
| "model": "gpt-4o-mini-2024-07-18", | ||
| "choices": [ | ||
| { | ||
| "index": 0, | ||
| "message": { | ||
| "role": "assistant", | ||
| "content": '{"query": "Calculate 5 times 7"}', | ||
| "tool_calls": None, | ||
| }, | ||
| "finish_reason": "stop", | ||
| } | ||
| ], | ||
| "usage": { | ||
| "prompt_tokens": 100, | ||
| "completion_tokens": 20, | ||
| "total_tokens": 120, | ||
| }, | ||
| }, | ||
| ) | ||
|
|
||
| result = await generate_llm_input(eval_item, input_schema) | ||
|
|
||
| # Verify the mocked input is correct | ||
| assert result == {"query": "Calculate 5 times 7"} | ||
|
|
||
| requests = httpx_mock.get_requests() | ||
| chat_completion_requests = [r for r in requests if "chat/completions" in str(r.url)] | ||
| assert len(chat_completion_requests) == 1, ( | ||
| "Expected exactly one chat completion request" | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.