Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix search on auto completion #1544

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
There is an input element on an HTML page. Based on the context and information provided, you have two goals:
There is an input element on an HTML page. Based on the context and information provided, you have {{ "three" if is_search else "two" }} goals:
- Confirm if an auto-completion attempt appears after the user inputs the current value.
- If auto-completion suggestions appear, assist the user in selecting the most appropriate element based on the user's goal, details, and the context.
- If auto-completion suggestions appear, assist the user in selecting the most appropriate element based on the user's goal, details, and the context.{% if is_search %}
- Confirm if direct searching is a better way compared to all suggestions based on user's goal.{% endif %}

You can confirm an auto-completion attempt based on the following rules:
- Several auto-completion suggestions appear for the input value.
Expand All @@ -15,6 +16,8 @@ Each interactable element is tagged with an ID.

Reply in JSON format with the following keys:
{
"thought": str, // Think step by step. Describe your thought about how you achieve the {{ "three" if is_search else "two" }} goals with convincing evidence.{% if is_search %}
"direct_searching": bool, // True if direct searching is a better way compared to all suggestions, otherwise False.{% endif %}
"auto_completion_attempt": bool, // True if there's any auto completion attempt based on the rules. Otherwise, it should be False.
"reasoning": str, // The reasoning behind the decision. Be specific, referencing the value and the element id in your reasoning. Mention why you chose the element id. Keep the reasoning short and to the point.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence.
Expand All @@ -25,7 +28,7 @@ Reply in JSON format with the following keys:

Context:
```
Choose an auto-completion suggestion for "{{ field_information }}"
Choose an auto-completion suggestion for "{{ field_information }}"{%if is_search %} or directly search with the input value{% endif %}
```

Input value:
Expand Down
24 changes: 22 additions & 2 deletions skyvern/webeye/actions/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,12 +788,21 @@ async def handle_input_text_action(
await incremental_scraped.stop_listen_dom_increment()

return [ActionSuccess()]
except Exception as e:
LOG.exception(
"Failed to input the value or finish the auto completion",
task_id=task.task_id,
step_id=step.step_id,
)
raise e
finally:
# HACK: force to finish missing auto completion input
if auto_complete_hacky_flag and not await skyvern_element.is_raw_input():
if auto_complete_hacky_flag and await skyvern_element.is_visible() and not await skyvern_element.is_raw_input():
LOG.debug(
"Trigger input-selection hack, pressing Tab to choose one",
action=action,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.press_key("Tab")

Expand Down Expand Up @@ -1624,6 +1633,7 @@ async def choose_auto_completion_dropdown(
html = incremental_scraped.build_html_tree(cleaned_incremental_element)
auto_completion_confirm_prompt = prompt_engine.load_prompt(
"auto-completion-choose-option",
is_search=context.is_search_bar,
field_information=context.field,
filled_value=text,
navigation_goal=task.navigation_goal,
Expand All @@ -1638,6 +1648,16 @@ async def choose_auto_completion_dropdown(
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=auto_completion_confirm_prompt, step=step)
element_id = json_response.get("id", "")
relevance_float = json_response.get("relevance_float", 0)
if json_response.get("direct_searching", False):
LOG.info(
"Decided to directly search with the current value",
value=text,
step_id=step.step_id,
task_id=task.task_id,
)
await skyvern_element.press_key("Enter")
return result

if not element_id:
reasoning = json_response.get("reasoning")
raise NoSuitableAutoCompleteOption(reasoning=reasoning, target_value=text)
Expand Down Expand Up @@ -1682,7 +1702,7 @@ async def choose_auto_completion_dropdown(
return result
finally:
await incremental_scraped.stop_listen_dom_increment()
if clear_input:
if clear_input and await skyvern_element.is_visible():
await skyvern_element.input_clear()


Expand Down
23 changes: 17 additions & 6 deletions skyvern/webeye/scraper/domUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2132,15 +2132,26 @@ if (window.globalObserverForDOMIncrement === undefined) {
}

if (mutation.type === "childList") {
if (mutation.target.nodeType === Node.TEXT_NODE) continue;
const node = mutation.target;
let changedNode = {
targetNode: mutation.target, // TODO: for future usage, when we want to parse new elements into a tree
targetNode: node, // TODO: for future usage, when we want to parse new elements into a tree
};
let newNodes = [];
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
if (
node.tagName.toLowerCase() === "ul" ||
(node.tagName.toLowerCase() === "div" &&
node.hasAttribute("role") &&
node.getAttribute("role").toLowerCase() === "listbox")
) {
newNodes.push(node);
} else {
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
}
}
if (newNodes.length > 0) {
Expand Down
4 changes: 4 additions & 0 deletions skyvern/webeye/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,10 @@ async def start_listen_dom_increment(self) -> None:
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script)

async def stop_listen_dom_increment(self) -> None:
# check if the DOM has navigated away or refreshed
js_script = "() => window.globalObserverForDOMIncrement === undefined"
if await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script):
return
js_script = "() => stopGlobalIncrementalObserver()"
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script)

Expand Down
2 changes: 2 additions & 0 deletions skyvern/webeye/utils/dom.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ async def is_selectable(self) -> bool:
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT

async def is_visible(self) -> bool:
if not await self.get_locator().count():
return False
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.get_element_visible(await self.get_element_handler())

Expand Down
Loading