diff --git a/poetry.lock b/poetry.lock index d3b16a61..88b8bd81 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "annotated-types" @@ -95,19 +95,19 @@ test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock [[package]] name = "attrs" -version = "24.2.0" +version = "24.3.0" description = "Classes Without Boilerplate" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, - {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, + {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"}, + {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"}, ] [package.extras] benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] @@ -405,13 +405,13 @@ virtualenv = ["virtualenv (>=20.0.35)"] [[package]] name = "certifi" -version = "2024.8.30" +version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, - {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, + {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, + {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, ] [[package]] @@ -765,16 +765,17 @@ toml = ["tomli"] [[package]] name = "crawlee" -version = "0.4.5" +version = "0.5.0b17" description = "Crawlee for Python" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "crawlee-0.4.5-py3-none-any.whl", hash = "sha256:cbb730bbd9fd08671344af9ff46af2be6e50a3e57011b82e58782c058f8c76c5"}, - {file = "crawlee-0.4.5.tar.gz", hash = "sha256:c657eeb59d5471791e20801be23699cd1f03ad7883f126adedb6b3ed008c281d"}, + {file = "crawlee-0.5.0b17-py3-none-any.whl", hash = "sha256:6c1a3a01271aa49fe45f56cfc8bf3f3d23217119a638317df50ef2006215cb2b"}, + {file = "crawlee-0.5.0b17.tar.gz", hash = "sha256:0ba0f253ee246ee0873fbdff6353a051953d84df70f19dc21bf18de350c80fa4"}, ] [package.dependencies] +apify = ">=2.0.0" colorama = ">=0.4.0" cookiecutter = ">=2.6.0" docutils = ">=0.21.0" @@ -784,17 +785,16 @@ inquirer = ">=3.3.0" more_itertools = ">=10.2.0" psutil = ">=6.0.0" pydantic = ">=2.8.1,<2.10.0 || >2.10.0,<2.10.1 || >2.10.1,<2.10.2 || >2.10.2" -pydantic-settings = ">=2.2.0" +pydantic-settings = ">=2.2.0,<2.7.0" pyee = ">=9.0.0" sortedcollections = ">=2.1.0" tldextract = ">=5.1.0" typer = ">=0.12.0" typing-extensions = ">=4.1.0" -yarl = ">=1.18.0,<2.0.0" +yarl = ">=1.18.0" [package.extras] -all = ["apify (>=2.0.0)", "beautifulsoup4 (>=4.12.0)", "curl-cffi (>=0.7.2)", "html5lib (>=1.0)", "lxml (>=5.2.0)", "playwright (>=1.27.0)"] -apify = ["apify (>=2.0.0)"] +all = ["beautifulsoup4 (>=4.12.0)", "curl-cffi (>=0.7.2)", "html5lib (>=1.0)", "lxml (>=5.2.0)", "parsel (>=1.9.0)", "playwright (>=1.27.0)"] beautifulsoup = ["beautifulsoup4 (>=4.12.0)", "html5lib (>=1.0)", "lxml (>=5.2.0)"] curl-impersonate = ["curl-cffi (>=0.7.2)"] parsel = ["parsel (>=1.9.0)"] @@ -2288,13 +2288,13 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pydantic-settings" -version = "2.7.0" +version = "2.6.1" description = "Settings management using Pydantic" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_settings-2.7.0-py3-none-any.whl", hash = "sha256:e00c05d5fa6cbbb227c84bd7487c5c1065084119b750df7c8c1a554aed236eb5"}, - {file = "pydantic_settings-2.7.0.tar.gz", hash = "sha256:ac4bfd4a36831a48dbf8b2d9325425b549a0a6f18cea118436d728eb4f1c4d66"}, + {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"}, + {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"}, ] [package.dependencies] @@ -3537,4 +3537,4 @@ scrapy = ["scrapy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "008371392c5d2baf886b2529e3227434280d1d37122f0fee6a19e53451682fbb" +content-hash = "7f0b717e7923859101faac6faf147f05ec62d59ba6282bf55d8bd7edae42801b" diff --git a/pyproject.toml b/pyproject.toml index e5638dcd..26c76e12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ keywords = [ python = "^3.9" apify-client = ">=1.8.1" apify-shared = ">=1.2.1" -crawlee = "~0.4.0" +crawlee = "==0.5.0b17" cryptography = ">=42.0.0" # TODO: relax the upper bound once the issue is resolved: # https://github.com/apify/apify-sdk-python/issues/348 @@ -53,6 +53,8 @@ httpx = "~0.27.0" lazy-object-proxy = ">=1.10.0" scrapy = { version = ">=2.11.0", optional = true } typing-extensions = ">=4.1.0" +# TODO: relax the upper bound once the issue is resolved: +# https://github.com/apify/apify-sdk-python/issues/325 websockets = ">=10.0 <14.0.0" [tool.poetry.group.dev.dependencies] diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 4f076a7a..6c2f9064 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -12,8 +12,9 @@ from apify_client import ApifyClientAsync from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value -from crawlee import service_container +from crawlee import service_locator from crawlee.events._types import Event, EventPersistStateData +from crawlee.memory_storage_client import MemoryStorageClient from apify._configuration import Configuration from apify._consts import EVENT_LISTENERS_TIMEOUT @@ -69,17 +70,31 @@ def __init__( self._configure_logging = configure_logging self._apify_client = self.new_client() - self._event_manager: EventManager - if self._configuration.is_at_home: - self._event_manager = PlatformEventManager( + # We need to keep both local & cloud storage clients because of the `force_cloud` option. + self._local_storage_client = MemoryStorageClient.from_config() + self._cloud_storage_client = ApifyStorageClient(configuration=self._configuration) + + # Set the event manager based on whether the Actor is running on the platform or locally. + self._event_manager = ( + PlatformEventManager( config=self._configuration, persist_state_interval=self._configuration.persist_state_interval, ) - else: - self._event_manager = LocalEventManager( + if self.is_at_home() + else LocalEventManager( system_info_interval=self._configuration.system_info_interval, persist_state_interval=self._configuration.persist_state_interval, ) + ) + + # Register services in the service locator. + if self.is_at_home(): + service_locator.set_storage_client(self._cloud_storage_client) + else: + service_locator.set_storage_client(self._local_storage_client) + + service_locator.set_event_manager(self.event_manager) + service_locator.set_configuration(self.configuration) self._is_initialized = False @@ -93,7 +108,7 @@ async def __aenter__(self) -> Self: executing the block code, the `Actor.fail` method is called. """ if self._configure_logging: - _configure_logging(self._configuration) + _configure_logging() await self.init() return self @@ -182,16 +197,6 @@ async def init(self) -> None: if self._is_initialized: raise RuntimeError('The Actor was already initialized!') - if self._configuration.token: - service_container.set_cloud_storage_client(ApifyStorageClient(configuration=self._configuration)) - - if self._configuration.is_at_home: - service_container.set_default_storage_client_type('cloud') - else: - service_container.set_default_storage_client_type('local') - - service_container.set_event_manager(self._event_manager) - self._is_exiting = False self._was_final_persist_state_emitted = False @@ -243,7 +248,6 @@ async def finalize() -> None: await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout) await self._event_manager.__aexit__(None, None, None) - cast(dict, service_container._services).clear() # noqa: SLF001 await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds()) self._is_initialized = False @@ -347,11 +351,13 @@ async def open_dataset( self._raise_if_not_initialized() self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud) + storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client() + return await Dataset.open( id=id, name=name, configuration=self._configuration, - storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None), + storage_client=storage_client, ) async def open_key_value_store( @@ -379,12 +385,13 @@ async def open_key_value_store( """ self._raise_if_not_initialized() self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud) + storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client() return await KeyValueStore.open( id=id, name=name, configuration=self._configuration, - storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None), + storage_client=storage_client, ) async def open_request_queue( @@ -415,11 +422,13 @@ async def open_request_queue( self._raise_if_not_initialized() self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud) + storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client() + return await RequestQueue.open( id=id, name=name, configuration=self._configuration, - storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None), + storage_client=storage_client, ) async def push_data(self, data: dict | list[dict]) -> None: @@ -941,7 +950,7 @@ async def create_proxy_configuration( password: str | None = None, groups: list[str] | None = None, country_code: str | None = None, - proxy_urls: list[str] | None = None, + proxy_urls: list[str | None] | None = None, new_url_function: _NewUrlFunction | None = None, ) -> ProxyConfiguration | None: """Create a ProxyConfiguration object with the passed proxy configuration. diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 018a6e98..70e8ae7c 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime, timedelta +from logging import getLogger from typing import Annotated, Any from pydantic import AliasChoices, BeforeValidator, Field @@ -12,6 +13,8 @@ from apify._utils import docs_group +logger = getLogger(__name__) + def _transform_to_list(value: Any) -> list[str] | None: if value is None: @@ -353,6 +356,11 @@ class Configuration(CrawleeConfiguration): ), ] = None + @classmethod + def get_global_configuration(cls) -> Configuration: + """Retrieve the global instance of the configuration. -# Monkey-patch the base class so that it works with the extended configuration -CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign] + Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()` + instead. + """ + return cls() diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index e564706c..6fa64f56 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -111,9 +111,9 @@ def __init__( password: str | None = None, groups: list[str] | None = None, country_code: str | None = None, - proxy_urls: list[str] | None = None, + proxy_urls: list[str | None] | None = None, new_url_function: _NewUrlFunction | None = None, - tiered_proxy_urls: list[list[str]] | None = None, + tiered_proxy_urls: list[list[str | None]] | None = None, _actor_config: Configuration | None = None, _apify_client: ApifyClientAsync | None = None, ) -> None: @@ -148,7 +148,7 @@ def __init__( ' "groups" or "country_code".' ) - if proxy_urls and any('apify.com' in url for url in proxy_urls): + if proxy_urls and any('apify.com' in (url or '') for url in proxy_urls): logger.warning( 'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties ' 'instead of `proxy_urls`.\n' diff --git a/src/apify/log.py b/src/apify/log.py index 698474f6..970a37a6 100644 --- a/src/apify/log.py +++ b/src/apify/log.py @@ -1,14 +1,10 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING from apify_shared.utils import ignore_docs from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level -if TYPE_CHECKING: - from apify import Configuration - # Name of the logger used throughout the library (resolves to 'apify') logger_name = __name__.split('.')[0] @@ -21,11 +17,11 @@ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from pare pass -def _configure_logging(configuration: Configuration) -> None: +def _configure_logging() -> None: apify_client_logger = logging.getLogger('apify_client') - configure_logger(apify_client_logger, configuration, remove_old_handlers=True) + configure_logger(apify_client_logger, remove_old_handlers=True) - level = get_configured_log_level(configuration) + level = get_configured_log_level() # Keep apify_client logger quiet unless debug logging is requested if level > logging.DEBUG: @@ -42,4 +38,4 @@ def _configure_logging(configuration: Configuration) -> None: # Use configured log level for apify logger apify_logger = logging.getLogger('apify') - configure_logger(apify_logger, configuration, remove_old_handlers=True) + configure_logger(apify_logger, remove_old_handlers=True) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 060644bd..f5aca4ff 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -7,13 +7,15 @@ import sys import textwrap from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Protocol, cast +from typing import TYPE_CHECKING, Any, Callable, Protocol import pytest from filelock import FileLock from apify_client import ApifyClientAsync -from apify_shared.consts import ActorJobStatus, ActorSourceType +from apify_shared.consts import ActorJobStatus, ActorSourceType, ApifyEnvVars +from crawlee import service_locator +from crawlee.storages import _creation_management import apify._actor from ._utils import generate_unique_resource_name @@ -29,19 +31,67 @@ _SDK_ROOT_PATH = Path(__file__).parent.parent.parent.resolve() -@pytest.fixture(autouse=True) -def _reset_and_patch_default_instances() -> None: - """Reset the used singletons and patch the default storage client with a temporary directory. +@pytest.fixture +def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]: + """Prepare the testing environment by resetting the global state before each test. + + This fixture ensures that the global state of the package is reset to a known baseline before each test runs. + It also configures a temporary storage directory for test isolation. + + Args: + monkeypatch: Test utility provided by pytest for patching. + tmp_path: A unique temporary directory path provided by pytest for test isolation. - To isolate the tests, we need to reset the used singletons before each test case. We also patch the default - storage client with a tmp_path. + Returns: + A callable that prepares the test environment. """ - from crawlee import service_container - cast(dict, service_container._services).clear() - delattr(apify._actor.Actor, '__wrapped__') + def _prepare_test_env() -> None: + delattr(apify._actor.Actor, '__wrapped__') + + # Set the environment variable for the local storage directory to the temporary path. + monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) + + # Reset the flags in the service locator to indicate that no services are explicitly set. This ensures + # a clean state, as services might have been set during a previous test and not reset properly. + service_locator._configuration_was_set = False + service_locator._storage_client_was_set = False + service_locator._event_manager_was_set = False + + # Reset the services in the service locator. + service_locator._configuration = None + service_locator._event_manager = None + service_locator._storage_client = None + + # Clear creation-related caches to ensure no state is carried over between tests. + monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_dataset_by_name', {}) + monkeypatch.setattr(_creation_management, '_cache_kvs_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_kvs_by_name', {}) + monkeypatch.setattr(_creation_management, '_cache_rq_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_rq_by_name', {}) + + # Verify that the test environment was set up correctly. + assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) + assert service_locator._configuration_was_set is False + assert service_locator._storage_client_was_set is False + assert service_locator._event_manager_was_set is False + + return _prepare_test_env + + +@pytest.fixture(autouse=True) +def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None: + """Isolate the testing environment by resetting global state before and after each test. + + This fixture ensures that each test starts with a clean slate and that any modifications during the test + do not affect subsequent tests. It runs automatically for all tests. + + Args: + prepare_test_env: Fixture to prepare the environment before each test. + """ - # TODO: StorageClientManager local storage client purge # noqa: TD003 + prepare_test_env() @pytest.fixture diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 1d7b7660..24c352cb 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -2,16 +2,19 @@ import asyncio import inspect +import os from collections import defaultdict from copy import deepcopy -from typing import TYPE_CHECKING, Any, Callable, cast, get_type_hints +from typing import TYPE_CHECKING, Any, Callable, get_type_hints import pytest -from apify_client.client import ApifyClientAsync +from apify_client import ApifyClientAsync from apify_shared.consts import ApifyEnvVars +from crawlee import service_locator from crawlee.configuration import Configuration as CrawleeConfiguration from crawlee.memory_storage_client import MemoryStorageClient +from crawlee.storages import _creation_management import apify._actor @@ -20,45 +23,66 @@ @pytest.fixture -def reset_default_instances() -> Callable[[], None]: - def reset() -> None: - from crawlee.storages._creation_management import ( - _cache_dataset_by_id, - _cache_dataset_by_name, - _cache_kvs_by_id, - _cache_kvs_by_name, - _cache_rq_by_id, - _cache_rq_by_name, - ) - - _cache_dataset_by_id.clear() - _cache_dataset_by_name.clear() - _cache_kvs_by_id.clear() - _cache_kvs_by_name.clear() - _cache_rq_by_id.clear() - _cache_rq_by_name.clear() - - from crawlee import service_container - - cast(dict, service_container._services).clear() +def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]: + """Prepare the testing environment by resetting the global state before each test. + + This fixture ensures that the global state of the package is reset to a known baseline before each test runs. + It also configures a temporary storage directory for test isolation. + + Args: + monkeypatch: Test utility provided by pytest for patching. + tmp_path: A unique temporary directory path provided by pytest for test isolation. + + Returns: + A callable that prepares the test environment. + """ + + def _prepare_test_env() -> None: delattr(apify._actor.Actor, '__wrapped__') - # TODO: local storage client purge # noqa: TD003 - return reset + # Set the environment variable for the local storage directory to the temporary path. + monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) + + # Reset the flags in the service locator to indicate that no services are explicitly set. This ensures + # a clean state, as services might have been set during a previous test and not reset properly. + service_locator._configuration_was_set = False + service_locator._storage_client_was_set = False + service_locator._event_manager_was_set = False + + # Reset the services in the service locator. + service_locator._configuration = None + service_locator._event_manager = None + service_locator._storage_client = None + + # Clear creation-related caches to ensure no state is carried over between tests. + monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_dataset_by_name', {}) + monkeypatch.setattr(_creation_management, '_cache_kvs_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_kvs_by_name', {}) + monkeypatch.setattr(_creation_management, '_cache_rq_by_id', {}) + monkeypatch.setattr(_creation_management, '_cache_rq_by_name', {}) + + # Verify that the test environment was set up correctly. + assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) + assert service_locator._configuration_was_set is False + assert service_locator._storage_client_was_set is False + assert service_locator._event_manager_was_set is False + + return _prepare_test_env -# To isolate the tests, we need to reset the used singletons before each test case -# We also set the MemoryStorageClient to use a temp path @pytest.fixture(autouse=True) -def _reset_and_patch_default_instances( - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - reset_default_instances: Callable[[], None], -) -> None: - # This forces the MemoryStorageClient to use tmp_path for its storage dir - monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) +def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None: + """Isolate the testing environment by resetting global state before and after each test. + + This fixture ensures that each test starts with a clean slate and that any modifications during the test + do not affect subsequent tests. It runs automatically for all tests. + + Args: + prepare_test_env: Fixture to prepare the environment before each test. + """ - reset_default_instances() + prepare_test_env() # This class is used to patch the ApifyClientAsync methods to return a fixed value or be replaced with another method. @@ -179,4 +203,4 @@ def memory_storage_client() -> MemoryStorageClient: configuration.persist_storage = True configuration.write_metadata = True - return MemoryStorageClient(configuration) + return MemoryStorageClient.from_config(configuration) diff --git a/tests/unit/test_proxy_configuration.py b/tests/unit/test_proxy_configuration.py index fa2fd53b..57450897 100644 --- a/tests/unit/test_proxy_configuration.py +++ b/tests/unit/test_proxy_configuration.py @@ -153,7 +153,7 @@ async def test_new_url_with_session_ids() -> None: async def test_rotating_custom_urls() -> None: - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_urls: list[str | None] = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) assert await proxy_configuration.new_url() == proxy_urls[0] @@ -166,7 +166,7 @@ async def test_rotating_custom_urls() -> None: async def test_rotating_custom_urls_with_sessions() -> None: sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_urls: list[str | None] = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) @@ -239,18 +239,14 @@ def custom_new_url_function(session_id: str | None = None, request: Any = None) async def test_url_reference_not_shared_between_instances() -> None: - urls = [ + proxy_urls: list[str | None] = [ 'http://proxy-example-1.com:8000', 'http://proxy-example-2.com:8000', ] - proxy_configuration_1 = ProxyConfiguration( - proxy_urls=urls, - ) + proxy_configuration_1 = ProxyConfiguration(proxy_urls=proxy_urls) - urls.append('http://proxy-example-3.com:8000') - proxy_configuration_2 = ProxyConfiguration( - proxy_urls=urls, - ) + proxy_urls.append('http://proxy-example-3.com:8000') + proxy_configuration_2 = ProxyConfiguration(proxy_urls=proxy_urls) assert proxy_configuration_1 is not None assert proxy_configuration_2 is not None @@ -296,7 +292,7 @@ async def test_new_proxy_info_basic_construction() -> None: async def test_new_proxy_info_rotating_urls() -> None: - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_urls: list[str | None] = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls) proxy_info = await proxy_configuration.new_proxy_info() @@ -326,7 +322,7 @@ async def test_new_proxy_info_rotating_urls() -> None: async def test_new_proxy_info_rotating_urls_with_sessions() -> None: sessions = ['sesssion_01', 'sesssion_02', 'sesssion_03', 'sesssion_04', 'sesssion_05', 'sesssion_06'] - proxy_urls = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] + proxy_urls: list[str | None] = ['http://proxy.com:1111', 'http://proxy.com:2222', 'http://proxy.com:3333'] proxy_configuration = ProxyConfiguration(proxy_urls=proxy_urls)