From a7904a2cc776d67a3ccc6120343211ac59947196 Mon Sep 17 00:00:00 2001 From: lawyzheng Date: Tue, 26 Nov 2024 12:24:13 +0800 Subject: [PATCH 1/2] use skyvern temp to save tempfiles --- skyvern/config.py | 4 +-- skyvern/forge/sdk/api/files.py | 33 +++++++++++++++++++-- skyvern/forge/sdk/artifact/storage/local.py | 5 ++-- skyvern/forge/sdk/artifact/storage/s3.py | 16 ++++++---- skyvern/forge/sdk/workflow/models/block.py | 4 +-- skyvern/webeye/browser_factory.py | 4 +-- 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index 72ade14523..bce59736f1 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -14,6 +14,7 @@ class Settings(BaseSettings): VIDEO_PATH: str | None = None HAR_PATH: str | None = "./har" LOG_PATH: str = "./log" + TEMP_PATH: str = "./temp" BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 BROWSER_LOADING_TIMEOUT_MS: int = 120000 @@ -74,9 +75,6 @@ class Settings(BaseSettings): WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY: str = "SKYVERN_DOWNLOAD_DIRECTORY" WORKFLOW_WAIT_BLOCK_MAX_SEC: int = 30 * 60 - # streaming settings - STREAMING_FILE_BASE_PATH: str = "/tmp" - # Saved browser session settings BROWSER_SESSION_BASE_PATH: str = f"{constants.REPO_ROOT_DIR}/browser_sessions" diff --git a/skyvern/forge/sdk/api/files.py b/skyvern/forge/sdk/api/files.py index 0434841f1a..8fb378bb1f 100644 --- a/skyvern/forge/sdk/api/files.py +++ b/skyvern/forge/sdk/api/files.py @@ -14,13 +14,14 @@ from skyvern.constants import REPO_ROOT_DIR from skyvern.exceptions import DownloadFileMaxSizeExceeded from skyvern.forge.sdk.api.aws import AsyncAWSClient +from skyvern.forge.sdk.settings_manager import SettingsManager LOG = structlog.get_logger() async def download_from_s3(client: AsyncAWSClient, s3_uri: str) -> str: downloaded_bytes = await client.download_file(uri=s3_uri) - file_path = tempfile.NamedTemporaryFile(delete=False) + file_path = create_named_temporary_file(delete=False) file_path.write(downloaded_bytes) return file_path.name @@ -56,7 +57,7 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str: a = urlparse(url) # Get the file name - temp_dir = tempfile.mkdtemp(prefix="skyvern_downloads_") + temp_dir = make_temp_directory(prefix="skyvern_downloads_") file_name = os.path.basename(a.path) # if no suffix in the URL, we need to parse it from HTTP headers @@ -151,3 +152,31 @@ def calculate_sha256_for_file(file_path: str) -> str: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() + + +def create_folder_if_not_exist(dir: str) -> None: + path = Path(dir) + if path.exists(): + return + path.mkdir(parents=True) + + +def get_skyvern_temp_dir() -> str: + temp_dir = SettingsManager.get_settings().TEMP_PATH + create_folder_if_not_exist(temp_dir) + return temp_dir + + +def make_temp_directory( + suffix: str | None = None, + prefix: str | None = None, +) -> str: + temp_dir = SettingsManager.get_settings().TEMP_PATH + create_folder_if_not_exist(temp_dir) + return tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=temp_dir) + + +def create_named_temporary_file(delete: bool = True) -> tempfile._TemporaryFileWrapper: + temp_dir = SettingsManager.get_settings().TEMP_PATH + create_folder_if_not_exist(temp_dir) + return tempfile.NamedTemporaryFile(dir=temp_dir, delete=delete) diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index 7df2c54f1a..54f0c54ed1 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -6,6 +6,7 @@ import structlog +from skyvern.forge.sdk.api.files import get_skyvern_temp_dir from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.models import Step @@ -73,9 +74,9 @@ async def save_streaming_file(self, organization_id: str, file_name: str) -> Non return async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None: - file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/skyvern_screenshot.png") + file_path = Path(f"{get_skyvern_temp_dir()}/skyvern_screenshot.png") if not use_default: - file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}") + file_path = Path(f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}") try: with open(file_path, "rb") as f: return f.read() diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index b7a1726ee4..c3b7601c20 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -1,10 +1,14 @@ import shutil -import tempfile from datetime import datetime from skyvern.config import settings from skyvern.forge.sdk.api.aws import AsyncAWSClient -from skyvern.forge.sdk.api.files import unzip_files +from skyvern.forge.sdk.api.files import ( + create_named_temporary_file, + get_skyvern_temp_dir, + make_temp_directory, + unzip_files, +) from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.models import Step @@ -36,7 +40,7 @@ async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None: await self.async_client.upload_file_from_path(artifact.uri, path) async def save_streaming_file(self, organization_id: str, file_name: str) -> None: - from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}" + from_path = f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}" to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}" await self.async_client.upload_file_from_path(to_path, from_path) @@ -46,7 +50,7 @@ async def get_streaming_file(self, organization_id: str, file_name: str, use_def async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None: # Zip the directory to a temp file - temp_zip_file = tempfile.NamedTemporaryFile() + temp_zip_file = create_named_temporary_file() zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory) browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip" await self.async_client.upload_file_from_path(browser_session_uri, zip_file_path) @@ -56,11 +60,11 @@ async def retrieve_browser_session(self, organization_id: str, workflow_permanen downloaded_zip_bytes = await self.async_client.download_file(browser_session_uri, log_exception=True) if not downloaded_zip_bytes: return None - temp_zip_file = tempfile.NamedTemporaryFile(delete=False) + temp_zip_file = create_named_temporary_file(delete=False) temp_zip_file.write(downloaded_zip_bytes) temp_zip_file_path = temp_zip_file.name - temp_dir = tempfile.mkdtemp(prefix="skyvern_browser_session_") + temp_dir = make_temp_directory(prefix="skyvern_browser_session_") unzip_files(temp_zip_file_path, temp_dir) temp_zip_file.close() return temp_dir diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 07dab21758..4386758d92 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -11,7 +11,6 @@ from email.message import EmailMessage from enum import StrEnum from pathlib import Path -from tempfile import NamedTemporaryFile from typing import Annotated, Any, Literal, Union import filetype @@ -36,6 +35,7 @@ from skyvern.forge.sdk.api.aws import AsyncAWSClient from skyvern.forge.sdk.api.files import ( calculate_sha256_for_file, + create_named_temporary_file, download_file, download_from_s3, get_path_for_workflow_download_directory, @@ -1056,7 +1056,7 @@ def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run async def _download_from_s3(self, s3_uri: str) -> str: client = self.get_async_aws_client() downloaded_bytes = await client.download_file(uri=s3_uri) - file_path = NamedTemporaryFile(delete=False) + file_path = create_named_temporary_file(delete=False) file_path.write(downloaded_bytes) return file_path.name diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 25b99fc60c..da58890573 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -2,7 +2,6 @@ import asyncio import os -import tempfile import time import uuid from datetime import datetime @@ -24,6 +23,7 @@ UnknownBrowserType, UnknownErrorWhileCreatingBrowserContext, ) +from skyvern.forge.sdk.api.files import make_temp_directory from skyvern.forge.sdk.core.skyvern_context import current from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.settings_manager import SettingsManager @@ -153,7 +153,7 @@ def build_browser_args() -> dict[str, Any]: video_dir = f"{SettingsManager.get_settings().VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}" har_dir = f"{SettingsManager.get_settings().HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har" return { - "user_data_dir": tempfile.mkdtemp(prefix="skyvern_browser_"), + "user_data_dir": make_temp_directory(prefix="skyvern_browser_"), "locale": SettingsManager.get_settings().BROWSER_LOCALE, "timezone_id": SettingsManager.get_settings().BROWSER_TIMEZONE, "color_scheme": "no-preference", From 38d94ce6226e1c39a20c966f57af2a866d1ad3da Mon Sep 17 00:00:00 2001 From: lawyzheng Date: Tue, 26 Nov 2024 12:27:28 +0800 Subject: [PATCH 2/2] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 045020079e..a88e841b26 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,7 @@ traces/ har/ postgres-data files/ +temp/ # Streamlit ignores **/secrets*.toml