Skip to content

Commit

Permalink
Compatible local model
Browse files Browse the repository at this point in the history
  • Loading branch information
wangkun committed Dec 18, 2024
1 parent 58413db commit df672cf
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 15 deletions.
33 changes: 24 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ services:
image: postgres:14-alpine
restart: always
# comment out if you want to externally connect DB
# ports:
# - 5432:5432
ports:
- 5432:5432
volumes:
- ./postgres-data:/var/lib/postgresql/data
- ../postgres-data:/var/lib/postgresql/data
environment:
- PGDATA=/var/lib/postgresql/data/pgdata
- POSTGRES_USER=skyvern
Expand All @@ -19,7 +19,7 @@ services:
retries: 5

skyvern:
image: public.ecr.aws/skyvern/skyvern:latest
image: swr.cn-south-1.myhuaweicloud.com/coastal/skyvern:0.1.0.1
restart: on-failure
# comment out if you want to externally call skyvern API
ports:
Expand All @@ -30,21 +30,34 @@ services:
- ./har:/data/har
- ./log:/data/log
- ./.streamlit:/app/.streamlit
- /etc/timezone:/etc/timezone
- /etc/localtime:/etc/localtime
# - ./skyvern:/app/skyvern
environment:
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
- BROWSER_TYPE=chromium-headful
- ENABLE_OPENAI=true
- OPENAI_API_KEY=<your_openai_key>
- LITELLM_LOG=DEBUG
# - ENABLE_OPENAI=true
# - LLM_KEY=OPENAI_GPT4O_MINI
# - OPENAI_API_KEY=<your_openai_api_key>
# - OPENAI_API_BASE=https://free.v36.cm
# - LLM_KEY=OPENAI_GPT4O
# - OPENAI_API_KEY=<your_openai_api_key>
# - OPENAI_API_BASE=https://models.inference.ai.azure.com
- ENABLE_MISTRAL=true
- LLM_KEY=MISTRAL_7B_Q4
- OPENAI_API_BASE=http://localhost:1337/api/v1
- OPENAI_API_KEY=<your_openai_api_key>
# If you want to use other LLM provider, like azure and anthropic:
# - ENABLE_ANTHROPIC=true
# - LLM_KEY=ANTHROPIC_CLAUDE3_OPUS
# - ANTHROPIC_API_KEY=<your_anthropic_key>
# - ENABLE_AZURE=true
# - LLM_KEY=AZURE_OPENAI
# - AZURE_DEPLOYMENT=<your_azure_deployment>
# - AZURE_DEPLOYMENT=gpt-4o
# - AZURE_API_KEY=<your_azure_api_key>
# - AZURE_API_BASE=<your_azure_api_base>
# - AZURE_API_VERSION=<your_azure_api_version>
# - AZURE_API_BASE=https://models.inference.ai.azure.com
# - AZURE_API_VERSION=latest
depends_on:
postgres:
condition: service_healthy
Expand All @@ -65,6 +78,8 @@ services:
- ./videos:/data/videos
- ./har:/data/har
- ./.streamlit:/app/.streamlit
- /etc/timezone:/etc/timezone
- /etc/localtime:/etc/localtime
environment:
# if you want to run skyvern on a remote server,
# you need to change the host in VITE_WSS_BASE_URL and VITE_API_BASE_URL to match your server ip
Expand Down
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ main() {
choose_python_version_or_fail
remove_poetry_env
install_dependencies
setup_postgresql
# setup_postgresql
activate_poetry_env
install_dependencies_after_poetry_env
run_alembic_upgrade
Expand Down
2 changes: 2 additions & 0 deletions skyvern/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,15 @@ class Settings(BaseSettings):
LLM_CONFIG_TEMPERATURE: float = 0
# LLM PROVIDER SPECIFIC
ENABLE_OPENAI: bool = False
ENABLE_MISTRAL: bool = False
ENABLE_ANTHROPIC: bool = False
ENABLE_AZURE: bool = False
ENABLE_AZURE_GPT4O_MINI: bool = False
ENABLE_BEDROCK: bool = False
ENABLE_GEMINI: bool = False
# OPENAI
OPENAI_API_KEY: str | None = None
OPENAI_API_BASE: str | None = None
# ANTHROPIC
ANTHROPIC_API_KEY: str | None = None
# AZURE
Expand Down
4 changes: 2 additions & 2 deletions skyvern/forge/agent_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
LOG = structlog.get_logger()

USELESS_SHAPE_ATTRIBUTE = [SKYVERN_ID_ATTR, "id", "aria-describedby"]
SVG_SHAPE_CONVERTION_ATTEMPTS = 3
SVG_SHAPE_CONVERTION_ATTEMPTS = 1
CSS_SHAPE_CONVERTION_ATTEMPTS = 1
INVALID_SHAPE = "N/A"

Expand Down Expand Up @@ -151,7 +151,7 @@ async def _convert_svg_to_string(
element["isDropped"] = True
return

LOG.debug("call LLM to convert SVG to string shape", element_id=element_id)
LOG.info("call LLM to convert SVG to string shape", element_id=element_id, svg_element=svg_html)
svg_convert_prompt = prompt_engine.load_prompt("svg-convert", svg_element=svg_html)

for retry in range(SVG_SHAPE_CONVERTION_ATTEMPTS):
Expand Down
2 changes: 1 addition & 1 deletion skyvern/forge/prompts/skyvern/extract-action.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Each interactable element is tagged with an ID. Avoid taking action on a disable
If you see any information in red in the page screenshot, this means a condition wasn't satisfied. prioritize actions with the red information.
If you see a popup in the page screenshot, prioritize actions on the popup.

Reply in JSON format with the following keys:
Don't give me any code! Reply in JSON format with the following keys:
{
"user_goal_stage": str, // A string to describe the reasoning whether user goal has been achieved or not.
"user_goal_achieved": bool, // True if the user goal has been completed, otherwise False.
Expand Down
2 changes: 1 addition & 1 deletion skyvern/forge/prompts/skyvern/svg-convert.j2
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ SVG Element:
{{svg_element}}
```

MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
MAKE SURE YOU OUTPUT IS ONLY VALID JSON. Don't give me any code! No text before or after JSON, no Explanation, no trailing commas, no comments (//), no unnecessary quotes, etc.
Reply in JSON format with the following keys:
{
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
Expand Down
39 changes: 38 additions & 1 deletion skyvern/forge/sdk/api/llm/config_registry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import structlog
import litellm

from skyvern.config import settings
from skyvern.forge.sdk.api.llm.exceptions import (
Expand Down Expand Up @@ -47,6 +48,7 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
if not any(
[
settings.ENABLE_OPENAI,
settings.ENABLE_MISTRAL,
settings.ENABLE_ANTHROPIC,
settings.ENABLE_AZURE,
settings.ENABLE_AZURE_GPT4O_MINI,
Expand Down Expand Up @@ -79,14 +81,27 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
LLMConfigRegistry.register_config(
"OPENAI_GPT4O",
LLMConfig(
"gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
"gpt-4o", ["OPENAI_API_KEY"],

litellm_params=LiteLLMParams(
api_base=settings.OPENAI_API_BASE,
api_key=settings.OPENAI_API_KEY,
model_info={"model_name": "gpt-4o"},
),
supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
),
)
LLMConfigRegistry.register_config(
"OPENAI_GPT4O_MINI",
LLMConfig(
"gpt-4o-mini",
["OPENAI_API_KEY"],

litellm_params=LiteLLMParams(
api_base=settings.OPENAI_API_BASE,
api_key=settings.OPENAI_API_KEY,
model_info={"model_name": "gpt-4o-mini"},
),
supports_vision=True,
add_assistant_prefix=False,
max_output_tokens=16384,
Expand All @@ -103,6 +118,28 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
),
)

if settings.ENABLE_MISTRAL:
LLMConfigRegistry.register_config(
"MISTRAL_7B_Q4",
LLMConfig(
"openai/mistral-ins-7b-q4", ["OPENAI_API_BASE", "OPENAI_API_KEY"],
litellm_params=LiteLLMParams(
api_base=settings.OPENAI_API_BASE,
api_key=settings.OPENAI_API_KEY,
model_info={"model_name": "openai/mistral-ins-7b-q4"},
),
supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
),
)
litellm.register_model({
"_": {
"max_tokens": 16384,
"input_cost_per_token": 0.00002,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
},
})

if settings.ENABLE_ANTHROPIC:
LLMConfigRegistry.register_config(
Expand Down
3 changes: 3 additions & 0 deletions skyvern/forge/sdk/api/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bo
content = None
try:
content = response.choices[0].message.content
LOG.info("LLM response", content=content)
if content.endswith("</s>"):
content = content[:-len("</s>")]
# Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object:
if add_assistant_prefix:
content = "{" + content
Expand Down

0 comments on commit df672cf

Please sign in to comment.