Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use skyvern temp to save tempfiles #1262

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ traces/
har/
postgres-data
files/
temp/

# Streamlit ignores
**/secrets*.toml
Expand Down
4 changes: 1 addition & 3 deletions skyvern/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class Settings(BaseSettings):
VIDEO_PATH: str | None = None
HAR_PATH: str | None = "./har"
LOG_PATH: str = "./log"
TEMP_PATH: str = "./temp"
BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000
Expand Down Expand Up @@ -74,9 +75,6 @@ class Settings(BaseSettings):
WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY: str = "SKYVERN_DOWNLOAD_DIRECTORY"
WORKFLOW_WAIT_BLOCK_MAX_SEC: int = 30 * 60

# streaming settings
STREAMING_FILE_BASE_PATH: str = "/tmp"

# Saved browser session settings
BROWSER_SESSION_BASE_PATH: str = f"{constants.REPO_ROOT_DIR}/browser_sessions"

Expand Down
33 changes: 31 additions & 2 deletions skyvern/forge/sdk/api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
from skyvern.constants import REPO_ROOT_DIR
from skyvern.exceptions import DownloadFileMaxSizeExceeded
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.settings_manager import SettingsManager

LOG = structlog.get_logger()


async def download_from_s3(client: AsyncAWSClient, s3_uri: str) -> str:
downloaded_bytes = await client.download_file(uri=s3_uri)
file_path = tempfile.NamedTemporaryFile(delete=False)
file_path = create_named_temporary_file(delete=False)
file_path.write(downloaded_bytes)
return file_path.name

Expand Down Expand Up @@ -56,7 +57,7 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str:
a = urlparse(url)

# Get the file name
temp_dir = tempfile.mkdtemp(prefix="skyvern_downloads_")
temp_dir = make_temp_directory(prefix="skyvern_downloads_")

file_name = os.path.basename(a.path)
# if no suffix in the URL, we need to parse it from HTTP headers
Expand Down Expand Up @@ -151,3 +152,31 @@ def calculate_sha256_for_file(file_path: str) -> str:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()


def create_folder_if_not_exist(dir: str) -> None:
path = Path(dir)
if path.exists():
return
path.mkdir(parents=True)


def get_skyvern_temp_dir() -> str:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return temp_dir


def make_temp_directory(
suffix: str | None = None,
prefix: str | None = None,
) -> str:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=temp_dir)


def create_named_temporary_file(delete: bool = True) -> tempfile._TemporaryFileWrapper:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return tempfile.NamedTemporaryFile(dir=temp_dir, delete=delete)
5 changes: 3 additions & 2 deletions skyvern/forge/sdk/artifact/storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import structlog

from skyvern.forge.sdk.api.files import get_skyvern_temp_dir
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
Expand Down Expand Up @@ -73,9 +74,9 @@ async def save_streaming_file(self, organization_id: str, file_name: str) -> Non
return

async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/skyvern_screenshot.png")
file_path = Path(f"{get_skyvern_temp_dir()}/skyvern_screenshot.png")
if not use_default:
file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}")
file_path = Path(f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}")
try:
with open(file_path, "rb") as f:
return f.read()
Expand Down
16 changes: 10 additions & 6 deletions skyvern/forge/sdk/artifact/storage/s3.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import shutil
import tempfile
from datetime import datetime

from skyvern.config import settings
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import unzip_files
from skyvern.forge.sdk.api.files import (
create_named_temporary_file,
get_skyvern_temp_dir,
make_temp_directory,
unzip_files,
)
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
Expand Down Expand Up @@ -36,7 +40,7 @@ async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
await self.async_client.upload_file_from_path(artifact.uri, path)

async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}"
from_path = f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}"
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
await self.async_client.upload_file_from_path(to_path, from_path)

Expand All @@ -46,7 +50,7 @@ async def get_streaming_file(self, organization_id: str, file_name: str, use_def

async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None:
# Zip the directory to a temp file
temp_zip_file = tempfile.NamedTemporaryFile()
temp_zip_file = create_named_temporary_file()
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip"
await self.async_client.upload_file_from_path(browser_session_uri, zip_file_path)
Expand All @@ -56,11 +60,11 @@ async def retrieve_browser_session(self, organization_id: str, workflow_permanen
downloaded_zip_bytes = await self.async_client.download_file(browser_session_uri, log_exception=True)
if not downloaded_zip_bytes:
return None
temp_zip_file = tempfile.NamedTemporaryFile(delete=False)
temp_zip_file = create_named_temporary_file(delete=False)
temp_zip_file.write(downloaded_zip_bytes)
temp_zip_file_path = temp_zip_file.name

temp_dir = tempfile.mkdtemp(prefix="skyvern_browser_session_")
temp_dir = make_temp_directory(prefix="skyvern_browser_session_")
unzip_files(temp_zip_file_path, temp_dir)
temp_zip_file.close()
return temp_dir
4 changes: 2 additions & 2 deletions skyvern/forge/sdk/workflow/models/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from email.message import EmailMessage
from enum import StrEnum
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Annotated, Any, Literal, Union

import filetype
Expand All @@ -36,6 +35,7 @@
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import (
calculate_sha256_for_file,
create_named_temporary_file,
download_file,
download_from_s3,
get_path_for_workflow_download_directory,
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run
async def _download_from_s3(self, s3_uri: str) -> str:
client = self.get_async_aws_client()
downloaded_bytes = await client.download_file(uri=s3_uri)
file_path = NamedTemporaryFile(delete=False)
file_path = create_named_temporary_file(delete=False)
file_path.write(downloaded_bytes)
return file_path.name

Expand Down
4 changes: 2 additions & 2 deletions skyvern/webeye/browser_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import asyncio
import os
import tempfile
import time
import uuid
from datetime import datetime
Expand All @@ -24,6 +23,7 @@
UnknownBrowserType,
UnknownErrorWhileCreatingBrowserContext,
)
from skyvern.forge.sdk.api.files import make_temp_directory
from skyvern.forge.sdk.core.skyvern_context import current
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.settings_manager import SettingsManager
Expand Down Expand Up @@ -153,7 +153,7 @@ def build_browser_args() -> dict[str, Any]:
video_dir = f"{SettingsManager.get_settings().VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}"
har_dir = f"{SettingsManager.get_settings().HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har"
return {
"user_data_dir": tempfile.mkdtemp(prefix="skyvern_browser_"),
"user_data_dir": make_temp_directory(prefix="skyvern_browser_"),
"locale": SettingsManager.get_settings().BROWSER_LOCALE,
"timezone_id": SettingsManager.get_settings().BROWSER_TIMEZONE,
"color_scheme": "no-preference",
Expand Down