diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index fadc38929b..bb181103da 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -293,6 +293,11 @@ def __init__(self, step_id: str, reason: str) -> None: super().__init__(f"Step {step_id} cannot be executed and task execution is stopped. Reason: {reason}") +class SVGConversionFailed(SkyvernException): + def __init__(self, svg_html: str) -> None: + super().__init__(f"Failed to convert SVG after max retries. svg_html={svg_html}") + + class UnsupportedActionType(SkyvernException): def __init__(self, action_type: str): super().__init__(f"Unsupport action type: {action_type}") diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py index 152bd8d490..c4874b237e 100644 --- a/skyvern/forge/agent_functions.py +++ b/skyvern/forge/agent_functions.py @@ -1,3 +1,4 @@ +import asyncio import copy import hashlib from typing import Awaitable, Callable, Dict, List @@ -6,7 +7,7 @@ from playwright.async_api import Page from skyvern.constants import SKYVERN_ID_ATTR -from skyvern.exceptions import StepUnableToExecuteError +from skyvern.exceptions import StepUnableToExecuteError, SVGConversionFailed from skyvern.forge import app from skyvern.forge.async_operations import AsyncOperation from skyvern.forge.prompts import prompt_engine @@ -19,6 +20,9 @@ LOG = structlog.get_logger() +USELESS_SVG_ATTRIBUTE = [SKYVERN_ID_ATTR, "id", "aria-describedby"] +SVG_RETRY_ATTEMPT = 3 + def _remove_rect(element: dict) -> None: if "rect" in element: @@ -38,8 +42,11 @@ def _remove_skyvern_attributes(element: Dict) -> Dict: if element_copied.get(attr): del element_copied[attr] - if element_copied.get("attributes") and SKYVERN_ID_ATTR in element_copied.get("attributes", {}): - del element_copied["attributes"][SKYVERN_ID_ATTR] + if "attributes" in element_copied: + attributes: dict = copy.deepcopy(element_copied.get("attributes", {})) + for key in attributes.keys(): + if key in USELESS_SVG_ATTRIBUTE: + del element_copied["attributes"][key] children: List[Dict] | None = element_copied.get("children", None) if children is None: @@ -80,20 +87,25 @@ async def _convert_svg_to_string(task: Task, step: Step, organization: Organizat else: LOG.debug("call LLM to convert SVG to string shape", element_id=element_id) svg_convert_prompt = prompt_engine.load_prompt("svg-convert", svg_element=svg_html) - try: - json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=svg_convert_prompt, step=step) - svg_shape = json_response.get("shape", "") - if not svg_shape: - raise Exception("Empty SVG shape replied by secondary llm") - LOG.info("SVG converted by LLM", element_id=element_id, shape=svg_shape) - await app.CACHE.set(svg_key, svg_shape) - except Exception: - LOG.exception( - "Failed to convert SVG to string shape by secondary llm", - element=element, - svg_html=svg_html, - ) - return + + for retry in range(SVG_RETRY_ATTEMPT): + try: + json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=svg_convert_prompt, step=step) + svg_shape = json_response.get("shape", "") + if not svg_shape: + raise Exception("Empty SVG shape replied by secondary llm") + LOG.info("SVG converted by LLM", element_id=element_id, shape=svg_shape) + await app.CACHE.set(svg_key, svg_shape) + break + except Exception: + LOG.exception( + "Failed to convert SVG to string shape by secondary llm. Will retry if haven't met the max try attempt after 3s.", + element_id=element_id, + retry=retry, + ) + await asyncio.sleep(3) + else: + raise SVGConversionFailed(svg_html=svg_html) element["attributes"] = dict() element["attributes"]["alt"] = svg_shape