Compatible local model

init-object · Dec 18, 2024 · df672cf · df672cf
1 parent 58413db
commit df672cf
Show file tree

Hide file tree

Showing 8 changed files with 72 additions and 15 deletions.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -3,10 +3,10 @@ services:
     image: postgres:14-alpine
     restart: always
     # comment out if you want to externally connect DB
-    # ports:
-    #   - 5432:5432
+    ports:
+      - 5432:5432
     volumes:
-      - ./postgres-data:/var/lib/postgresql/data
+      - ../postgres-data:/var/lib/postgresql/data
     environment:
       - PGDATA=/var/lib/postgresql/data/pgdata
       - POSTGRES_USER=skyvern
@@ -19,7 +19,7 @@ services:
       retries: 5
 
   skyvern:
-    image: public.ecr.aws/skyvern/skyvern:latest
+    image: swr.cn-south-1.myhuaweicloud.com/coastal/skyvern:0.1.0.1
     restart: on-failure
     # comment out if you want to externally call skyvern API
     ports:
@@ -30,21 +30,34 @@ services:
       - ./har:/data/har
       - ./log:/data/log
       - ./.streamlit:/app/.streamlit
+      - /etc/timezone:/etc/timezone
+      - /etc/localtime:/etc/localtime
+      # - ./skyvern:/app/skyvern
     environment:
       - DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
       - BROWSER_TYPE=chromium-headful
-      - ENABLE_OPENAI=true
-      - OPENAI_API_KEY=<your_openai_key>
+      - LITELLM_LOG=DEBUG
+      # - ENABLE_OPENAI=true
+      # - LLM_KEY=OPENAI_GPT4O_MINI
+      # - OPENAI_API_KEY=<your_openai_api_key>
+      # - OPENAI_API_BASE=https://free.v36.cm
+      # - LLM_KEY=OPENAI_GPT4O
+      # - OPENAI_API_KEY=<your_openai_api_key>
+      # - OPENAI_API_BASE=https://models.inference.ai.azure.com
+      - ENABLE_MISTRAL=true
+      - LLM_KEY=MISTRAL_7B_Q4
+      - OPENAI_API_BASE=http://localhost:1337/api/v1
+      - OPENAI_API_KEY=<your_openai_api_key>
       # If you want to use other LLM provider, like azure and anthropic:
       # - ENABLE_ANTHROPIC=true
       # - LLM_KEY=ANTHROPIC_CLAUDE3_OPUS
       # - ANTHROPIC_API_KEY=<your_anthropic_key>
       # - ENABLE_AZURE=true
       # - LLM_KEY=AZURE_OPENAI
-      # - AZURE_DEPLOYMENT=<your_azure_deployment>
+      # - AZURE_DEPLOYMENT=gpt-4o
       # - AZURE_API_KEY=<your_azure_api_key>
-      # - AZURE_API_BASE=<your_azure_api_base>
-      # - AZURE_API_VERSION=<your_azure_api_version>
+      # - AZURE_API_BASE=https://models.inference.ai.azure.com
+      # - AZURE_API_VERSION=latest
     depends_on:
       postgres:
         condition: service_healthy
@@ -65,6 +78,8 @@ services:
       - ./videos:/data/videos
       - ./har:/data/har
       - ./.streamlit:/app/.streamlit
+      - /etc/timezone:/etc/timezone
+      - /etc/localtime:/etc/localtime
     environment:
     # if you want to run skyvern on a remote server,
     # you need to change the host in VITE_WSS_BASE_URL and VITE_API_BASE_URL to match your server ip

diff --git a/setup.sh b/setup.sh
@@ -314,7 +314,7 @@ main() {
     choose_python_version_or_fail
     remove_poetry_env
     install_dependencies
-    setup_postgresql
+    # setup_postgresql
     activate_poetry_env
     install_dependencies_after_poetry_env
     run_alembic_upgrade

diff --git a/skyvern/config.py b/skyvern/config.py
@@ -100,13 +100,15 @@ class Settings(BaseSettings):
     LLM_CONFIG_TEMPERATURE: float = 0
     # LLM PROVIDER SPECIFIC
     ENABLE_OPENAI: bool = False
+    ENABLE_MISTRAL: bool = False
     ENABLE_ANTHROPIC: bool = False
     ENABLE_AZURE: bool = False
     ENABLE_AZURE_GPT4O_MINI: bool = False
     ENABLE_BEDROCK: bool = False
     ENABLE_GEMINI: bool = False
     # OPENAI
     OPENAI_API_KEY: str | None = None
+    OPENAI_API_BASE: str | None = None
     # ANTHROPIC
     ANTHROPIC_API_KEY: str | None = None
     # AZURE

diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py
@@ -23,7 +23,7 @@
 LOG = structlog.get_logger()
 
 USELESS_SHAPE_ATTRIBUTE = [SKYVERN_ID_ATTR, "id", "aria-describedby"]
-SVG_SHAPE_CONVERTION_ATTEMPTS = 3
+SVG_SHAPE_CONVERTION_ATTEMPTS = 1
 CSS_SHAPE_CONVERTION_ATTEMPTS = 1
 INVALID_SHAPE = "N/A"
 
@@ -151,7 +151,7 @@ async def _convert_svg_to_string(
             element["isDropped"] = True
             return
 
-        LOG.debug("call LLM to convert SVG to string shape", element_id=element_id)
+        LOG.info("call LLM to convert SVG to string shape", element_id=element_id, svg_element=svg_html)
         svg_convert_prompt = prompt_engine.load_prompt("svg-convert", svg_element=svg_html)
 
         for retry in range(SVG_SHAPE_CONVERTION_ATTEMPTS):

diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2
@@ -7,7 +7,7 @@ Each interactable element is tagged with an ID. Avoid taking action on a disable
 If you see any information in red in the page screenshot, this means a condition wasn't satisfied. prioritize actions with the red information.
 If you see a popup in the page screenshot, prioritize actions on the popup.
 
-Reply in JSON format with the following keys:
+Don't give me any code! Reply in JSON format with the following keys:
 {
     "user_goal_stage": str, // A string to describe the reasoning whether user goal has been achieved or not.
     "user_goal_achieved": bool, // True if the user goal has been completed, otherwise False.

diff --git a/skyvern/forge/prompts/skyvern/svg-convert.j2 b/skyvern/forge/prompts/skyvern/svg-convert.j2
@@ -4,7 +4,7 @@ SVG Element:
 {{svg_element}}
 ```
 
-MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+MAKE SURE YOU OUTPUT IS ONLY VALID JSON. Don't give me any code! No text before or after JSON, no Explanation, no trailing commas, no comments (//), no unnecessary quotes, etc.
 Reply in JSON format with the following keys:
 {
     "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence

diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -1,4 +1,5 @@
 import structlog
+import litellm
 
 from skyvern.config import settings
 from skyvern.forge.sdk.api.llm.exceptions import (
@@ -47,6 +48,7 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
 if not any(
     [
         settings.ENABLE_OPENAI,
+        settings.ENABLE_MISTRAL,
         settings.ENABLE_ANTHROPIC,
         settings.ENABLE_AZURE,
         settings.ENABLE_AZURE_GPT4O_MINI,
@@ -79,14 +81,27 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
     LLMConfigRegistry.register_config(
         "OPENAI_GPT4O",
         LLMConfig(
-            "gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
+            "gpt-4o", ["OPENAI_API_KEY"],
+
+            litellm_params=LiteLLMParams(
+                api_base=settings.OPENAI_API_BASE,
+                api_key=settings.OPENAI_API_KEY,
+                model_info={"model_name": "gpt-4o"},
+            ),
+            supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
         ),
     )
     LLMConfigRegistry.register_config(
         "OPENAI_GPT4O_MINI",
         LLMConfig(
             "gpt-4o-mini",
             ["OPENAI_API_KEY"],
+
+            litellm_params=LiteLLMParams(
+                api_base=settings.OPENAI_API_BASE,
+                api_key=settings.OPENAI_API_KEY,
+                model_info={"model_name": "gpt-4o-mini"},
+            ),
             supports_vision=True,
             add_assistant_prefix=False,
             max_output_tokens=16384,
@@ -103,6 +118,28 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
         ),
     )
 
+if settings.ENABLE_MISTRAL:
+    LLMConfigRegistry.register_config(
+        "MISTRAL_7B_Q4",
+        LLMConfig(
+            "openai/mistral-ins-7b-q4", ["OPENAI_API_BASE", "OPENAI_API_KEY"],
+            litellm_params=LiteLLMParams(
+                api_base=settings.OPENAI_API_BASE,
+                api_key=settings.OPENAI_API_KEY,
+                model_info={"model_name": "openai/mistral-ins-7b-q4"},
+            ),
+            supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
+        ),
+    )
+    litellm.register_model({
+            "_": {
+            "max_tokens": 16384, 
+            "input_cost_per_token": 0.00002, 
+            "output_cost_per_token": 0.00006, 
+            "litellm_provider": "openai", 
+            "mode": "chat"
+        },
+    })
 
 if settings.ENABLE_ANTHROPIC:
     LLMConfigRegistry.register_config(

diff --git a/skyvern/forge/sdk/api/llm/utils.py b/skyvern/forge/sdk/api/llm/utils.py
@@ -48,6 +48,9 @@ def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bo
     content = None
     try:
         content = response.choices[0].message.content
+        LOG.info("LLM response", content=content)
+        if content.endswith("</s>"):
+            content = content[:-len("</s>")]
         # Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object:
         if add_assistant_prefix:
             content = "{" + content