Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/crawlee/browsers/_playwright_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import shutil
import tempfile
from logging import getLogger
from pathlib import Path
from typing import TYPE_CHECKING, Any

from playwright.async_api import Browser
from typing_extensions import override

if TYPE_CHECKING:
from pathlib import Path

from playwright.async_api import BrowserContext, BrowserType, CDPSession, Page

logger = getLogger(__name__)
Expand All @@ -36,7 +35,7 @@ def __init__(
self._browser_type = browser_type
self._browser_launch_options = browser_launch_options
self._user_data_dir = user_data_dir
self._temp_dir: str | None = None
self._temp_dir: Path | None = None

self._context: BrowserContext | None = None
self._is_connected = True
Expand All @@ -63,7 +62,7 @@ async def new_context(self, **context_options: Any) -> BrowserContext:
user_data_dir = self._user_data_dir
else:
user_data_dir = tempfile.mkdtemp(prefix=self._TMP_DIR_PREFIX)
self._temp_dir = user_data_dir
self._temp_dir = Path(user_data_dir)

self._context = await self._browser_type.launch_persistent_context(
user_data_dir=user_data_dir, **launch_options
Expand All @@ -74,9 +73,9 @@ async def new_context(self, **context_options: Any) -> BrowserContext:

return self._context

async def _delete_temp_dir(self, _: BrowserContext) -> None:
if self._temp_dir:
await asyncio.to_thread(shutil.rmtree, self._temp_dir)
async def _delete_temp_dir(self, _: BrowserContext | None) -> None:
if self._temp_dir and self._temp_dir.exists():
await asyncio.to_thread(shutil.rmtree, self._temp_dir, ignore_errors=True)

@override
async def close(self, **kwargs: Any) -> None:
Expand All @@ -85,6 +84,8 @@ async def close(self, **kwargs: Any) -> None:
await self._context.close()
self._context = None
self._is_connected = False
await asyncio.sleep(0.1)
await self._delete_temp_dir(self._context)

@property
@override
Expand Down
44 changes: 44 additions & 0 deletions tests/unit/browsers/test_playwright_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

import pytest
from playwright.async_api import async_playwright

from crawlee.browsers._playwright_browser import PlaywrightPersistentBrowser

if TYPE_CHECKING:
from collections.abc import AsyncGenerator

from playwright.async_api import Playwright


@pytest.fixture
async def playwright() -> AsyncGenerator[Playwright, None]:
async with async_playwright() as playwright:
yield playwright


async def test_init(playwright: Playwright) -> None:
browser_type = playwright.chromium
persist_browser = PlaywrightPersistentBrowser(browser_type, user_data_dir=None, browser_launch_options={})
assert persist_browser._browser_type == browser_type
assert persist_browser.browser_type == browser_type
assert persist_browser._browser_launch_options == {}
assert persist_browser._temp_dir is None
assert persist_browser._user_data_dir is None
assert persist_browser._is_connected is True
assert persist_browser.is_connected() is True


async def test_delete_temp_folder_with_close_browser(playwright: Playwright) -> None:
persist_browser = PlaywrightPersistentBrowser(
playwright.chromium, user_data_dir=None, browser_launch_options={'headless': True}
)
await persist_browser.new_context()
assert isinstance(persist_browser._temp_dir, Path)
current_temp_dir = persist_browser._temp_dir
assert current_temp_dir.exists()
await persist_browser.close()
assert not current_temp_dir.exists()
Loading