From 70ac5e0bbed044660d07c5b21f33afc22b62373b Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Fri, 24 Oct 2025 10:53:22 +0200 Subject: [PATCH 1/7] Restructure the tests --- .../src => actor}/__init__.py | 0 .../{ => actor}/actor_source_base/Dockerfile | 0 .../actor_source_base/requirements.txt | 0 .../{ => actor}/actor_source_base/server.py | 0 .../actor/actor_source_base/src/__init__.py | 0 .../actor_source_base/src/__main__.py | 0 .../{ => actor}/actor_source_base/src/main.py | 0 tests/integration/actor/conftest.py | 322 +++++++++++++++ .../{ => actor}/test_actor_api_helpers.py | 2 +- .../{ => actor}/test_actor_call_timeouts.py | 0 .../{ => actor}/test_actor_charge.py | 0 .../test_actor_create_proxy_configuration.py | 0 .../{ => actor}/test_actor_dataset.py | 2 +- .../{ => actor}/test_actor_events.py | 0 .../{ => actor}/test_actor_key_value_store.py | 2 +- .../{ => actor}/test_actor_lifecycle.py | 0 .../integration/{ => actor}/test_actor_log.py | 18 +- .../{ => actor}/test_actor_request_queue.py | 242 ++++++----- .../{ => actor}/test_actor_scrapy.py | 0 .../integration/actor/test_apify_storages.py | 28 ++ .../test_crawlers_with_storages.py | 0 .../integration/{ => actor}/test_fixtures.py | 0 tests/integration/apify_api/__init__.py | 0 tests/integration/apify_api/conftest.py | 70 ++++ .../{ => apify_api}/test_apify_storages.py | 27 -- .../{ => apify_api}/test_request_queue.py | 249 +++++------ tests/integration/conftest.py | 391 +----------------- 27 files changed, 679 insertions(+), 674 deletions(-) rename tests/integration/{actor_source_base/src => actor}/__init__.py (100%) rename tests/integration/{ => actor}/actor_source_base/Dockerfile (100%) rename tests/integration/{ => actor}/actor_source_base/requirements.txt (100%) rename tests/integration/{ => actor}/actor_source_base/server.py (100%) create mode 100644 tests/integration/actor/actor_source_base/src/__init__.py rename tests/integration/{ => actor}/actor_source_base/src/__main__.py (100%) rename tests/integration/{ => actor}/actor_source_base/src/main.py (100%) create mode 100644 tests/integration/actor/conftest.py rename tests/integration/{ => actor}/test_actor_api_helpers.py (99%) rename tests/integration/{ => actor}/test_actor_call_timeouts.py (100%) rename tests/integration/{ => actor}/test_actor_charge.py (100%) rename tests/integration/{ => actor}/test_actor_create_proxy_configuration.py (100%) rename tests/integration/{ => actor}/test_actor_dataset.py (99%) rename tests/integration/{ => actor}/test_actor_events.py (100%) rename tests/integration/{ => actor}/test_actor_key_value_store.py (99%) rename tests/integration/{ => actor}/test_actor_lifecycle.py (100%) rename tests/integration/{ => actor}/test_actor_log.py (86%) rename tests/integration/{ => actor}/test_actor_request_queue.py (72%) rename tests/integration/{ => actor}/test_actor_scrapy.py (100%) create mode 100644 tests/integration/actor/test_apify_storages.py rename tests/integration/{ => actor}/test_crawlers_with_storages.py (100%) rename tests/integration/{ => actor}/test_fixtures.py (100%) create mode 100644 tests/integration/apify_api/__init__.py create mode 100644 tests/integration/apify_api/conftest.py rename tests/integration/{ => apify_api}/test_apify_storages.py (82%) rename tests/integration/{ => apify_api}/test_request_queue.py (88%) diff --git a/tests/integration/actor_source_base/src/__init__.py b/tests/integration/actor/__init__.py similarity index 100% rename from tests/integration/actor_source_base/src/__init__.py rename to tests/integration/actor/__init__.py diff --git a/tests/integration/actor_source_base/Dockerfile b/tests/integration/actor/actor_source_base/Dockerfile similarity index 100% rename from tests/integration/actor_source_base/Dockerfile rename to tests/integration/actor/actor_source_base/Dockerfile diff --git a/tests/integration/actor_source_base/requirements.txt b/tests/integration/actor/actor_source_base/requirements.txt similarity index 100% rename from tests/integration/actor_source_base/requirements.txt rename to tests/integration/actor/actor_source_base/requirements.txt diff --git a/tests/integration/actor_source_base/server.py b/tests/integration/actor/actor_source_base/server.py similarity index 100% rename from tests/integration/actor_source_base/server.py rename to tests/integration/actor/actor_source_base/server.py diff --git a/tests/integration/actor/actor_source_base/src/__init__.py b/tests/integration/actor/actor_source_base/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/actor_source_base/src/__main__.py b/tests/integration/actor/actor_source_base/src/__main__.py similarity index 100% rename from tests/integration/actor_source_base/src/__main__.py rename to tests/integration/actor/actor_source_base/src/__main__.py diff --git a/tests/integration/actor_source_base/src/main.py b/tests/integration/actor/actor_source_base/src/main.py similarity index 100% rename from tests/integration/actor_source_base/src/main.py rename to tests/integration/actor/actor_source_base/src/main.py diff --git a/tests/integration/actor/conftest.py b/tests/integration/actor/conftest.py new file mode 100644 index 00000000..2d5205b1 --- /dev/null +++ b/tests/integration/actor/conftest.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +import base64 +import inspect +import os +import subprocess +import sys +import textwrap +from pathlib import Path +from typing import TYPE_CHECKING, Any, Protocol + +import pytest +from filelock import FileLock + +from apify_client import ApifyClient, ApifyClientAsync +from apify_shared.consts import ActorJobStatus, ActorSourceType + +from .._utils import generate_unique_resource_name +from apify._models import ActorRun + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Coroutine, Iterator, Mapping + from decimal import Decimal + + from apify_client.clients.resource_clients import ActorClientAsync + +_TOKEN_ENV_VAR = 'APIFY_TEST_USER_API_TOKEN' +_API_URL_ENV_VAR = 'APIFY_INTEGRATION_TESTS_API_URL' +_SDK_ROOT_PATH = Path(__file__).parent.parent.parent.parent.resolve() + + +@pytest.fixture(scope='session') +def sdk_wheel_path(tmp_path_factory: pytest.TempPathFactory, testrun_uid: str) -> Path: + """Build the package wheel if it hasn't been built yet, and return the path to the wheel.""" + # Make sure the wheel is not being built concurrently across all the pytest-xdist runners, + # through locking the building process with a temp file. + with FileLock(tmp_path_factory.getbasetemp().parent / 'sdk_wheel_build.lock'): + # Make sure the wheel is built exactly once across across all the pytest-xdist runners, + # through an indicator file saying that the wheel was already built. + was_wheel_built_this_test_run_file = tmp_path_factory.getbasetemp() / f'wheel_was_built_in_run_{testrun_uid}' + if not was_wheel_built_this_test_run_file.exists(): + subprocess.run( + args='python -m build', + cwd=_SDK_ROOT_PATH, + shell=True, + check=True, + capture_output=True, + ) + was_wheel_built_this_test_run_file.touch() + + # Read the current package version, necessary for getting the right wheel filename. + pyproject_toml_file = (_SDK_ROOT_PATH / 'pyproject.toml').read_text(encoding='utf-8') + for line in pyproject_toml_file.splitlines(): + if line.startswith('version = '): + delim = '"' if '"' in line else "'" + sdk_version = line.split(delim)[1] + break + else: + raise RuntimeError('Unable to find version string.') + + wheel_path = _SDK_ROOT_PATH / 'dist' / f'apify-{sdk_version}-py3-none-any.whl' + + # Just to be sure. + assert wheel_path.exists() + + return wheel_path + + +@pytest.fixture(scope='session') +def actor_base_source_files(sdk_wheel_path: Path) -> dict[str, str | bytes]: + """Create a dictionary of the base source files for a testing Actor. + + It takes the files from `tests/integration/actor_source_base`, builds the Apify SDK wheel from + the current codebase, and adds them all together in a dictionary. + """ + source_files: dict[str, str | bytes] = {} + + # First read the actor_source_base files + actor_source_base_path = _SDK_ROOT_PATH / 'tests/integration/actor/actor_source_base' + + for path in actor_source_base_path.glob('**/*'): + if not path.is_file(): + continue + relative_path = str(path.relative_to(actor_source_base_path)) + try: + source_files[relative_path] = path.read_text(encoding='utf-8') + except ValueError: + source_files[relative_path] = path.read_bytes() + + sdk_wheel_file_name = sdk_wheel_path.name + source_files[sdk_wheel_file_name] = sdk_wheel_path.read_bytes() + + source_files['requirements.txt'] = str(source_files['requirements.txt']).replace( + 'APIFY_SDK_WHEEL_PLACEHOLDER', f'./{sdk_wheel_file_name}' + ) + + current_major_minor_python_version = '.'.join([str(x) for x in sys.version_info[:2]]) + integration_tests_python_version = ( + os.getenv('INTEGRATION_TESTS_PYTHON_VERSION') or current_major_minor_python_version + ) + source_files['Dockerfile'] = str(source_files['Dockerfile']).replace( + 'BASE_IMAGE_VERSION_PLACEHOLDER', integration_tests_python_version + ) + + return source_files + + +class MakeActorFunction(Protocol): + """A type for the `make_actor` fixture.""" + + def __call__( + self, + label: str, + *, + main_func: Callable | None = None, + main_py: str | None = None, + source_files: Mapping[str, str | bytes] | None = None, + additional_requirements: list[str] | None = None, + ) -> Awaitable[ActorClientAsync]: + """Create a temporary Actor from the given main function or source files. + + The Actor will be uploaded to the Apify Platform, built there, and after the test finishes, it will + be automatically deleted. + + You have to pass exactly one of the `main_func`, `main_py` and `source_files` arguments. + + Args: + label: The label which will be a part of the generated Actor name. + main_func: The main function of the Actor. + main_py: The `src/main.py` file of the Actor. + source_files: A dictionary of the source files of the Actor. + additional_requirements: A list of additional requirements to be added to the `requirements.txt`. + + Returns: + A resource client for the created Actor. + """ + + +@pytest.fixture(scope='session') +def make_actor( + actor_base_source_files: dict[str, str | bytes], + apify_token: str, +) -> Iterator[MakeActorFunction]: + """Fixture for creating temporary Actors for testing purposes. + + This returns a function that creates a temporary Actor from the given main function or source files. The Actor + will be uploaded to the Apify Platform, built there, and after the test finishes, it will be automatically deleted. + """ + actors_for_cleanup: list[str] = [] + + async def _make_actor( + label: str, + *, + main_func: Callable | None = None, + main_py: str | None = None, + source_files: Mapping[str, str | bytes] | None = None, + additional_requirements: list[str] | None = None, + ) -> ActorClientAsync: + if not (main_func or main_py or source_files): + raise TypeError('One of `main_func`, `main_py` or `source_files` arguments must be specified') + + if (main_func and main_py) or (main_func and source_files) or (main_py and source_files): + raise TypeError('Cannot specify more than one of `main_func`, `main_py` and `source_files` arguments') + + client = ApifyClientAsync(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)) + actor_name = generate_unique_resource_name(label) + + # Get the source of main_func and convert it into a reasonable main_py file. + if main_func: + func_source = textwrap.dedent(inspect.getsource(main_func)) + func_source = func_source.replace(f'def {main_func.__name__}(', 'def main(') + main_py = '\n'.join( # noqa: FLY002 + [ + 'import asyncio', + '', + 'from apify import Actor', + '', + '', + '', + func_source, + ] + ) + + if main_py: + source_files = {'src/main.py': main_py} + + assert source_files is not None + + # Copy the source files dict from the fixture so that we're not overwriting it, and merge the passed + # argument in it. + actor_source_files = actor_base_source_files.copy() + actor_source_files.update(source_files) + + if additional_requirements: + # Get the current requirements.txt content (as a string). + req_content = actor_source_files.get('requirements.txt', '') + if isinstance(req_content, bytes): + req_content = req_content.decode('utf-8') + # Append the additional requirements, each on a new line. + additional_reqs = '\n'.join(additional_requirements) + req_content = req_content.strip() + '\n' + additional_reqs + '\n' + actor_source_files['requirements.txt'] = req_content + + # Reformat the source files in a format that the Apify API understands. + source_files_for_api = [] + for file_name, file_contents in actor_source_files.items(): + if isinstance(file_contents, str): + file_format = 'TEXT' + if file_name.endswith('.py'): + file_contents = textwrap.dedent(file_contents).lstrip() # noqa: PLW2901 + else: + file_format = 'BASE64' + file_contents = base64.b64encode(file_contents).decode('utf-8') # noqa: PLW2901 + + source_files_for_api.append( + { + 'name': file_name, + 'format': file_format, + 'content': file_contents, + } + ) + + print(f'Creating Actor {actor_name}...') + created_actor = await client.actors().create( + name=actor_name, + default_run_build='latest', + default_run_memory_mbytes=256, + default_run_timeout_secs=600, + versions=[ + { + 'versionNumber': '0.0', + 'buildTag': 'latest', + 'sourceType': ActorSourceType.SOURCE_FILES, + 'sourceFiles': source_files_for_api, + } + ], + ) + + actor_client = client.actor(created_actor['id']) + + print(f'Building Actor {actor_name}...') + build_result = await actor_client.build(version_number='0.0') + build_client = client.build(build_result['id']) + build_client_result = await build_client.wait_for_finish(wait_secs=600) + + assert build_client_result is not None + assert build_client_result['status'] == ActorJobStatus.SUCCEEDED + + # We only mark the client for cleanup if the build succeeded, so that if something goes wrong here, + # you have a chance to check the error. + actors_for_cleanup.append(created_actor['id']) + + return actor_client + + yield _make_actor + + # Delete all the generated Actors. + for actor_id in actors_for_cleanup: + actor_client = ApifyClient(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)).actor(actor_id) + + if (actor := actor_client.get()) is not None: + actor_client.update( + pricing_infos=[ + *actor.get('pricingInfos', []), + { + 'pricingModel': 'FREE', + }, + ] + ) + + actor_client.delete() + + +class RunActorFunction(Protocol): + """A type for the `run_actor` fixture.""" + + def __call__( + self, + actor: ActorClientAsync, + *, + run_input: Any = None, + max_total_charge_usd: Decimal | None = None, + ) -> Coroutine[None, None, ActorRun]: + """Initiate an Actor run and wait for its completion. + + Args: + actor: Actor async client, in testing context usually created by `make_actor` fixture. + run_input: Optional input for the Actor run. + + Returns: + Actor run result. + """ + + +@pytest.fixture(scope='session') +def run_actor(apify_client_async: ApifyClientAsync) -> RunActorFunction: + """Fixture for calling an Actor run and waiting for its completion. + + This fixture returns a function that initiates an Actor run with optional run input, waits for its completion, + and retrieves the final result. It uses the `wait_for_finish` method with a timeout of 10 minutes. + """ + + async def _run_actor( + actor: ActorClientAsync, + *, + run_input: Any = None, + max_total_charge_usd: Decimal | None = None, + ) -> ActorRun: + call_result = await actor.call( + run_input=run_input, + max_total_charge_usd=max_total_charge_usd, + ) + + assert isinstance(call_result, dict), 'The result of ActorClientAsync.call() is not a dictionary.' + assert 'id' in call_result, 'The result of ActorClientAsync.call() does not contain an ID.' + + run_client = apify_client_async.run(call_result['id']) + run_result = await run_client.wait_for_finish(wait_secs=600) + + return ActorRun.model_validate(run_result) + + return _run_actor diff --git a/tests/integration/test_actor_api_helpers.py b/tests/integration/actor/test_actor_api_helpers.py similarity index 99% rename from tests/integration/test_actor_api_helpers.py rename to tests/integration/actor/test_actor_api_helpers.py index c46cb8de..d6b42a99 100644 --- a/tests/integration/test_actor_api_helpers.py +++ b/tests/integration/actor/test_actor_api_helpers.py @@ -6,7 +6,7 @@ from crawlee._utils.crypto import crypto_random_object_id -from ._utils import generate_unique_resource_name +from .._utils import generate_unique_resource_name from apify import Actor from apify._models import ActorRun diff --git a/tests/integration/test_actor_call_timeouts.py b/tests/integration/actor/test_actor_call_timeouts.py similarity index 100% rename from tests/integration/test_actor_call_timeouts.py rename to tests/integration/actor/test_actor_call_timeouts.py diff --git a/tests/integration/test_actor_charge.py b/tests/integration/actor/test_actor_charge.py similarity index 100% rename from tests/integration/test_actor_charge.py rename to tests/integration/actor/test_actor_charge.py diff --git a/tests/integration/test_actor_create_proxy_configuration.py b/tests/integration/actor/test_actor_create_proxy_configuration.py similarity index 100% rename from tests/integration/test_actor_create_proxy_configuration.py rename to tests/integration/actor/test_actor_create_proxy_configuration.py diff --git a/tests/integration/test_actor_dataset.py b/tests/integration/actor/test_actor_dataset.py similarity index 99% rename from tests/integration/test_actor_dataset.py rename to tests/integration/actor/test_actor_dataset.py index c80bb342..409df584 100644 --- a/tests/integration/test_actor_dataset.py +++ b/tests/integration/actor/test_actor_dataset.py @@ -4,7 +4,7 @@ from apify_shared.consts import ApifyEnvVars -from ._utils import generate_unique_resource_name +from .._utils import generate_unique_resource_name from apify import Actor if TYPE_CHECKING: diff --git a/tests/integration/test_actor_events.py b/tests/integration/actor/test_actor_events.py similarity index 100% rename from tests/integration/test_actor_events.py rename to tests/integration/actor/test_actor_events.py diff --git a/tests/integration/test_actor_key_value_store.py b/tests/integration/actor/test_actor_key_value_store.py similarity index 99% rename from tests/integration/test_actor_key_value_store.py rename to tests/integration/actor/test_actor_key_value_store.py index 19d63b0f..2ed9af29 100644 --- a/tests/integration/test_actor_key_value_store.py +++ b/tests/integration/actor/test_actor_key_value_store.py @@ -4,7 +4,7 @@ from apify_shared.consts import ApifyEnvVars -from ._utils import generate_unique_resource_name +from .._utils import generate_unique_resource_name from apify import Actor if TYPE_CHECKING: diff --git a/tests/integration/test_actor_lifecycle.py b/tests/integration/actor/test_actor_lifecycle.py similarity index 100% rename from tests/integration/test_actor_lifecycle.py rename to tests/integration/actor/test_actor_lifecycle.py diff --git a/tests/integration/test_actor_log.py b/tests/integration/actor/test_actor_log.py similarity index 86% rename from tests/integration/test_actor_log.py rename to tests/integration/actor/test_actor_log.py index c3596841..e721a9d4 100644 --- a/tests/integration/test_actor_log.py +++ b/tests/integration/actor/test_actor_log.py @@ -2,17 +2,12 @@ from typing import TYPE_CHECKING -import pytest - from apify import Actor, __version__ if TYPE_CHECKING: from .conftest import MakeActorFunction, RunActorFunction -# TODO: What to do with the `browserforge` output? -# https://github.com/apify/apify-sdk-python/issues/423 -@pytest.mark.skip async def test_actor_logging( make_actor: MakeActorFunction, run_actor: RunActorFunction, @@ -64,12 +59,15 @@ async def main() -> None: run_log_lines = [line[25:] for line in run_log_lines] # This might be way too specific and easy to break, but let's hope not - assert run_log_lines.pop(0).startswith('ACTOR: Pulling Docker image') - assert run_log_lines.pop(0) == 'ACTOR: Creating Docker container.' - assert run_log_lines.pop(0) == 'ACTOR: Starting Docker container.' - assert run_log_lines.pop(0) == '[apify] INFO Initializing Actor...' + assert run_log_lines.pop(0).startswith('ACTOR: Pulling container image of build') + assert run_log_lines.pop(0) == 'ACTOR: Creating container.' + assert run_log_lines.pop(0) == 'ACTOR: Starting container.' + assert run_log_lines.pop(0) == ( + '[apify._configuration] WARN Actor is running on the Apify platform,' + ' `disable_browser_sandbox` was changed to True.' + ) assert run_log_lines.pop(0).startswith( - f'[apify] INFO System info ({{"apify_sdk_version": "{__version__}", "apify_client_version": "' + f'[apify] INFO Initializing Actor ({{"apify_sdk_version": "{__version__}", "apify_client_version": "' ) assert run_log_lines.pop(0) == '[apify] DEBUG Debug message' assert run_log_lines.pop(0) == '[apify] INFO Info message' diff --git a/tests/integration/test_actor_request_queue.py b/tests/integration/actor/test_actor_request_queue.py similarity index 72% rename from tests/integration/test_actor_request_queue.py rename to tests/integration/actor/test_actor_request_queue.py index 3a9053c7..9b61fe51 100644 --- a/tests/integration/test_actor_request_queue.py +++ b/tests/integration/actor/test_actor_request_queue.py @@ -2,41 +2,19 @@ import asyncio import logging -from typing import TYPE_CHECKING, Any -from unittest import mock +from typing import TYPE_CHECKING import pytest -from apify_shared.consts import ApifyEnvVars - -from ._utils import generate_unique_resource_name +from .._utils import generate_unique_resource_name from apify import Actor, Request from apify._models import ActorRun -from apify.storage_clients import ApifyStorageClient -from apify.storages import RequestQueue if TYPE_CHECKING: - from collections.abc import AsyncGenerator - from apify_client import ApifyClientAsync from .conftest import MakeActorFunction, RunActorFunction - - -@pytest.fixture(params=['single', 'shared']) -async def apify_named_rq( - apify_client_async: ApifyClientAsync, monkeypatch: pytest.MonkeyPatch, request: pytest.FixtureRequest -) -> AsyncGenerator[RequestQueue]: - assert apify_client_async.token - monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_client_async.token) - request_queue_name = generate_unique_resource_name('request_queue') - - async with Actor: - request_queue = await RequestQueue.open( - name=request_queue_name, storage_client=ApifyStorageClient(request_queue_access=request.param) - ) - yield request_queue - await request_queue.drop() + from apify.storages import RequestQueue async def test_same_references_in_default_rq( @@ -101,22 +79,6 @@ async def test_force_cloud( assert request_queue_request['url'] == 'http://example.com' -async def test_request_queue_is_finished( - apify_named_rq: RequestQueue, -) -> None: - await apify_named_rq.add_request(Request.from_url('http://example.com')) - assert not await apify_named_rq.is_finished() - - request = await apify_named_rq.fetch_next_request() - assert request is not None - assert not await apify_named_rq.is_finished(), ( - 'RequestQueue should not be finished unless the request is marked as handled.' - ) - - await apify_named_rq.mark_request_as_handled(request) - assert await apify_named_rq.is_finished() - - async def test_request_queue_deduplication( make_actor: MakeActorFunction, run_actor: RunActorFunction, @@ -130,8 +92,6 @@ async def test_request_queue_deduplication( """ async def main() -> None: - import asyncio - from apify import Actor, Request async with Actor: @@ -179,8 +139,6 @@ async def test_request_queue_deduplication_use_extended_unique_key( """ async def main() -> None: - import asyncio - from apify import Actor, Request async with Actor: @@ -228,9 +186,6 @@ async def test_request_queue_parallel_deduplication( third worker adding 10 new requests and 20 known requests and so on""" async def main() -> None: - import asyncio - import logging - from apify import Actor, Request worker_count = 10 @@ -274,55 +229,6 @@ async def add_requests_worker() -> None: assert run_result.status == 'SUCCEEDED' -async def test_request_queue_deduplication_unprocessed_requests( - apify_named_rq: RequestQueue, -) -> None: - """Test that the deduplication does not add unprocessed requests to the cache. - - In this test the first call is "hardcoded" to fail, even on all retries, so it never even sends the API request and - thus has no chance of increasing the `writeCount`. The second call can increase the `writeCount` only if it is not - cached, as cached requests do not make the call (tested in other tests). So this means the `unprocessedRequests` - request was intentionally not cached.""" - logging.getLogger('apify.storage_clients._apify._request_queue_client').setLevel(logging.DEBUG) - - await asyncio.sleep(10) # Wait to be sure that metadata are updated - - # Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client - rq_client = Actor.apify_client.request_queue(request_queue_id=apify_named_rq.id) - _rq = await rq_client.get() - assert _rq - stats_before = _rq.get('stats', {}) - Actor.log.info(stats_before) - - def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dict[str, list[dict]]: - """Simulate API returning unprocessed requests.""" - return { - 'processedRequests': [], - 'unprocessedRequests': [ - {'url': request['url'], 'uniqueKey': request['uniqueKey'], 'method': request['method']} - for request in requests - ], - } - - with mock.patch( - 'apify_client.clients.resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests', - side_effect=return_unprocessed_requests, - ): - # Simulate failed API call for adding requests. Request was not processed and should not be cached. - await apify_named_rq.add_requests(['http://example.com/1']) - - # This will succeed. - await apify_named_rq.add_requests(['http://example.com/1']) - - await asyncio.sleep(10) # Wait to be sure that metadata are updated - _rq = await rq_client.get() - assert _rq - stats_after = _rq.get('stats', {}) - Actor.log.info(stats_after) - - assert (stats_after['writeCount'] - stats_before['writeCount']) == 1 - - async def test_request_queue_had_multiple_clients_platform( make_actor: MakeActorFunction, run_actor: RunActorFunction, @@ -489,3 +395,145 @@ async def main() -> None: run_result = await run_actor(actor) assert run_result.status == 'SUCCEEDED' + + +@pytest.mark.skip( + reason='The Apify RQ client is not resilient to concurrent processing, making this test flaky. See issue #529.' +) +async def test_concurrent_processing_simulation( + make_actor: MakeActorFunction, + run_actor: RunActorFunction, +) -> None: + """Test simulation of concurrent request processing.""" + + async def main() -> None: + async with Actor: + rq = await Actor.open_request_queue() + Actor.log.info('Request queue opened') + + # Add requests for concurrent processing + for i in range(20): + await rq.add_request(f'https://example.com/concurrent/{i}') + Actor.log.info('Added 20 requests for concurrent processing') + + total_count = await rq.get_total_count() + assert total_count == 20, f'total_count={total_count}' + + # Simulate concurrent workers + async def worker() -> int: + processed = 0 + + while request := await rq.fetch_next_request(): + # Simulate some work + await asyncio.sleep(0.01) + + # Randomly reclaim some requests (simulate failures) + if processed % 7 == 0 and processed > 0: # Reclaim every 7th request + await rq.reclaim_request(request) + else: + await rq.mark_request_as_handled(request) + processed += 1 + + return processed + + # Run multiple workers concurrently + workers = [worker() for _ in range(3)] + results = await asyncio.gather(*workers) + + total_processed = sum(results) + Actor.log.info(f'Total processed by workers: {total_processed}') + Actor.log.info(f'Individual worker results: {results}') + + # Verify that workers processed some requests + assert total_processed > 0, f'total_processed={total_processed}' + assert len(results) == 3, f'len(results)={len(results)}' + + # Check queue state after concurrent processing + handled_after_workers = await rq.get_handled_count() + assert handled_after_workers == total_processed, ( + f'handled_after_workers={handled_after_workers}', + f'total_processed={total_processed}', + ) + + total_after_workers = await rq.get_total_count() + assert total_after_workers == 20, f'total_after_workers={total_after_workers}' + + # Process any remaining reclaimed requests + remaining_count = 0 + while not await rq.is_finished(): + request = await rq.fetch_next_request() + if request: + remaining_count += 1 + await rq.mark_request_as_handled(request) + else: + break + + Actor.log.info(f'Processed {remaining_count} remaining requests') + + # Verify final state + final_handled = await rq.get_handled_count() + final_total = await rq.get_total_count() + assert final_handled == 20, f'final_handled={final_handled}' + assert final_total == 20, f'final_total={final_total}' + assert total_processed + remaining_count == 20, ( + f'total_processed={total_processed}', + f'remaining_count={remaining_count}', + ) + + is_finished = await rq.is_finished() + assert is_finished is True, f'is_finished={is_finished}' + + actor = await make_actor(label='rq-concurrent-test', main_func=main) + run_result = await run_actor(actor) + assert run_result.status == 'SUCCEEDED' + + +async def test_rq_isolation( + make_actor: MakeActorFunction, + run_actor: RunActorFunction, +) -> None: + """Test that different request queues are properly isolated.""" + + async def main() -> None: + async with Actor: + # Get the unique actor name for creating unique queue names + actor_name = Actor.configuration.actor_id + + # Open multiple queues with unique names + rq1 = await Actor.open_request_queue(name=f'{actor_name}-rq-1') + rq2 = await Actor.open_request_queue(name=f'{actor_name}-rq-2') + Actor.log.info('Opened two separate named queues with unique names') + + # Verify they are different instances + assert rq1 is not rq2, f'rq1 is rq2={rq1 is rq2}' + Actor.log.info('Verified queues are different instances') + + # Add different requests to each queue + await rq1.add_request('https://example.com/queue1-request') + await rq2.add_request('https://example.com/queue2-request') + Actor.log.info('Added different requests to each queue') + + # Verify isolation + req1 = await rq1.fetch_next_request() + req2 = await rq2.fetch_next_request() + + assert req1 is not None, f'req1={req1}' + assert 'queue1' in req1.url, f'req1.url={req1.url}' + assert req2 is not None, f'req2={req2}' + assert 'queue2' in req2.url, f'req2.url={req2.url}' + Actor.log.info(f'Queue 1 request: {req1.url}') + Actor.log.info(f'Queue 2 request: {req2.url}') + Actor.log.info('Queue isolation verified successfully') + + # Clean up + await rq1.mark_request_as_handled(req1) + await rq2.mark_request_as_handled(req2) + + # Drop queues + await rq1.drop() + await rq2.drop() + Actor.log.info('Dropped both queues') + + actor = await make_actor(label='rq-isolation-test', main_func=main) + run_result = await run_actor(actor) + assert run_result.status == 'SUCCEEDED' diff --git a/tests/integration/test_actor_scrapy.py b/tests/integration/actor/test_actor_scrapy.py similarity index 100% rename from tests/integration/test_actor_scrapy.py rename to tests/integration/actor/test_actor_scrapy.py diff --git a/tests/integration/actor/test_apify_storages.py b/tests/integration/actor/test_apify_storages.py new file mode 100644 index 00000000..f3f3696a --- /dev/null +++ b/tests/integration/actor/test_apify_storages.py @@ -0,0 +1,28 @@ +from .conftest import MakeActorFunction, RunActorFunction + + +async def test_actor_full_explicit_storage_init_on_platform( + make_actor: MakeActorFunction, run_actor: RunActorFunction +) -> None: + async def main() -> None: + from crawlee import service_locator + + from apify import Actor + from apify.storage_clients import ApifyStorageClient, MemoryStorageClient, SmartApifyStorageClient + + service_locator.set_storage_client( + SmartApifyStorageClient( + local_storage_client=MemoryStorageClient(), + cloud_storage_client=ApifyStorageClient(request_queue_access='shared'), + ) + ) + async with Actor: + # Storages should be same as the cloud client is used on the platform + assert await Actor.open_dataset() is await Actor.open_dataset(force_cloud=True) + assert await Actor.open_key_value_store() is await Actor.open_key_value_store(force_cloud=True) + assert await Actor.open_request_queue() is await Actor.open_request_queue(force_cloud=True) + + actor = await make_actor(label='explicit_storage_init', main_func=main) + run_result = await run_actor(actor) + + assert run_result.status == 'SUCCEEDED' diff --git a/tests/integration/test_crawlers_with_storages.py b/tests/integration/actor/test_crawlers_with_storages.py similarity index 100% rename from tests/integration/test_crawlers_with_storages.py rename to tests/integration/actor/test_crawlers_with_storages.py diff --git a/tests/integration/test_fixtures.py b/tests/integration/actor/test_fixtures.py similarity index 100% rename from tests/integration/test_fixtures.py rename to tests/integration/actor/test_fixtures.py diff --git a/tests/integration/apify_api/__init__.py b/tests/integration/apify_api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/apify_api/conftest.py b/tests/integration/apify_api/conftest.py new file mode 100644 index 00000000..ac085da3 --- /dev/null +++ b/tests/integration/apify_api/conftest.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +import pytest + +from apify_shared.consts import ApifyEnvVars +from crawlee import service_locator + +import apify._actor +from apify.storage_clients._apify._utils import AliasResolver + +if TYPE_CHECKING: + from collections.abc import Callable + from pathlib import Path + + +@pytest.fixture +def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]: + """Prepare the testing environment by resetting the global state before each test. + + This fixture ensures that the global state of the package is reset to a known baseline before each test runs. + It also configures a temporary storage directory for test isolation. + + Args: + monkeypatch: Test utility provided by pytest for patching. + tmp_path: A unique temporary directory path provided by pytest for test isolation. + + Returns: + A callable that prepares the test environment. + """ + + def _prepare_test_env() -> None: + # Reset the Actor class state. + apify._actor.Actor.__wrapped__.__class__._is_any_instance_initialized = False # type: ignore[attr-defined] + apify._actor.Actor.__wrapped__.__class__._is_rebooting = False # type: ignore[attr-defined] + delattr(apify._actor.Actor, '__wrapped__') + + # Set the environment variable for the local storage directory to the temporary path. + monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) + + # Reset the services in the service locator. + service_locator._configuration = None + service_locator._event_manager = None + service_locator._storage_client = None + service_locator.storage_instance_manager.clear_cache() + + # Reset the AliasResolver class state. + AliasResolver._alias_map = {} + AliasResolver._alias_init_lock = None + + # Verify that the test environment was set up correctly. + assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) + + return _prepare_test_env + + +@pytest.fixture(autouse=True) +def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None: + """Isolate the testing environment by resetting global state before and after each test. + + This fixture ensures that each test starts with a clean slate and that any modifications during the test + do not affect subsequent tests. It runs automatically for all tests. + + Args: + prepare_test_env: Fixture to prepare the environment before each test. + """ + + prepare_test_env() diff --git a/tests/integration/test_apify_storages.py b/tests/integration/apify_api/test_apify_storages.py similarity index 82% rename from tests/integration/test_apify_storages.py rename to tests/integration/apify_api/test_apify_storages.py index 32cb5061..57826325 100644 --- a/tests/integration/test_apify_storages.py +++ b/tests/integration/apify_api/test_apify_storages.py @@ -5,7 +5,6 @@ from crawlee import service_locator from crawlee.storages import Dataset, KeyValueStore, RequestQueue -from .conftest import MakeActorFunction, RunActorFunction from apify import Actor, Configuration from apify.storage_clients import ApifyStorageClient, MemoryStorageClient, SmartApifyStorageClient @@ -124,29 +123,3 @@ async def test_actor_implicit_storage_init(apify_token: str) -> None: assert await Actor.open_dataset() is not await Actor.open_dataset(force_cloud=True) assert await Actor.open_key_value_store() is not await Actor.open_key_value_store(force_cloud=True) assert await Actor.open_request_queue() is not await Actor.open_request_queue(force_cloud=True) - - -async def test_actor_full_explicit_storage_init_on_platform( - make_actor: MakeActorFunction, run_actor: RunActorFunction -) -> None: - async def main() -> None: - from crawlee import service_locator - - from apify.storage_clients import ApifyStorageClient, MemoryStorageClient, SmartApifyStorageClient - - service_locator.set_storage_client( - SmartApifyStorageClient( - local_storage_client=MemoryStorageClient(), - cloud_storage_client=ApifyStorageClient(request_queue_access='shared'), - ) - ) - async with Actor: - # Storages should be same as the cloud client is used on the platform - assert await Actor.open_dataset() is await Actor.open_dataset(force_cloud=True) - assert await Actor.open_key_value_store() is await Actor.open_key_value_store(force_cloud=True) - assert await Actor.open_request_queue() is await Actor.open_request_queue(force_cloud=True) - - actor = await make_actor(label='explicit_storage_init', main_func=main) - run_result = await run_actor(actor) - - assert run_result.status == 'SUCCEEDED' diff --git a/tests/integration/test_request_queue.py b/tests/integration/apify_api/test_request_queue.py similarity index 88% rename from tests/integration/test_request_queue.py rename to tests/integration/apify_api/test_request_queue.py index 6b6ba9eb..6489ea0b 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/apify_api/test_request_queue.py @@ -1,8 +1,10 @@ from __future__ import annotations import asyncio +import logging from datetime import datetime, timezone -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, cast +from unittest import mock import pytest @@ -10,20 +12,34 @@ from crawlee import service_locator from crawlee.crawlers import BasicCrawler -from ._utils import generate_unique_resource_name +from .._utils import generate_unique_resource_name from apify import Actor, Request from apify.storage_clients import ApifyStorageClient from apify.storage_clients._apify._utils import unique_key_to_request_id from apify.storages import RequestQueue if TYPE_CHECKING: + from collections.abc import AsyncGenerator + from apify_client import ApifyClientAsync from crawlee._types import BasicCrawlingContext - from .conftest import MakeActorFunction, RunActorFunction from apify.storage_clients._apify._models import ApifyRequestQueueMetadata +@pytest.fixture(params=['single', 'shared']) +async def request_queue_apify( + apify_token: str, monkeypatch: pytest.MonkeyPatch, request: pytest.FixtureRequest +) -> AsyncGenerator[RequestQueue]: + """Create an instance of the Apify request queue on the platform and drop it when the test is finished.""" + monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_token) + + async with Actor: + rq = await RequestQueue.open(storage_client=ApifyStorageClient(request_queue_access=request.param)) + yield rq + await rq.drop() + + async def test_add_and_fetch_requests(request_queue_apify: RequestQueue) -> None: """Test basic functionality of adding and fetching requests.""" @@ -599,99 +615,6 @@ async def test_mixed_string_and_request_objects(request_queue_apify: RequestQueu Actor.log.info('Mixed types verified - found request object with user_data') -@pytest.mark.skip( - reason='The Apify RQ client is not resilient to concurrent processing, making this test flaky. See issue #529.' -) -async def test_concurrent_processing_simulation( - make_actor: MakeActorFunction, - run_actor: RunActorFunction, -) -> None: - """Test simulation of concurrent request processing.""" - - async def main() -> None: - import asyncio - - async with Actor: - rq = await Actor.open_request_queue() - Actor.log.info('Request queue opened') - - # Add requests for concurrent processing - for i in range(20): - await rq.add_request(f'https://example.com/concurrent/{i}') - Actor.log.info('Added 20 requests for concurrent processing') - - total_count = await rq.get_total_count() - assert total_count == 20, f'total_count={total_count}' - - # Simulate concurrent workers - async def worker() -> int: - processed = 0 - - while request := await rq.fetch_next_request(): - # Simulate some work - await asyncio.sleep(0.01) - - # Randomly reclaim some requests (simulate failures) - if processed % 7 == 0 and processed > 0: # Reclaim every 7th request - await rq.reclaim_request(request) - else: - await rq.mark_request_as_handled(request) - processed += 1 - - return processed - - # Run multiple workers concurrently - workers = [worker() for _ in range(3)] - results = await asyncio.gather(*workers) - - total_processed = sum(results) - Actor.log.info(f'Total processed by workers: {total_processed}') - Actor.log.info(f'Individual worker results: {results}') - - # Verify that workers processed some requests - assert total_processed > 0, f'total_processed={total_processed}' - assert len(results) == 3, f'len(results)={len(results)}' - - # Check queue state after concurrent processing - handled_after_workers = await rq.get_handled_count() - assert handled_after_workers == total_processed, ( - f'handled_after_workers={handled_after_workers}', - f'total_processed={total_processed}', - ) - - total_after_workers = await rq.get_total_count() - assert total_after_workers == 20, f'total_after_workers={total_after_workers}' - - # Process any remaining reclaimed requests - remaining_count = 0 - while not await rq.is_finished(): - request = await rq.fetch_next_request() - if request: - remaining_count += 1 - await rq.mark_request_as_handled(request) - else: - break - - Actor.log.info(f'Processed {remaining_count} remaining requests') - - # Verify final state - final_handled = await rq.get_handled_count() - final_total = await rq.get_total_count() - assert final_handled == 20, f'final_handled={final_handled}' - assert final_total == 20, f'final_total={final_total}' - assert total_processed + remaining_count == 20, ( - f'total_processed={total_processed}', - f'remaining_count={remaining_count}', - ) - - is_finished = await rq.is_finished() - assert is_finished is True, f'is_finished={is_finished}' - - actor = await make_actor(label='rq-concurrent-test', main_func=main) - run_result = await run_actor(actor) - assert run_result.status == 'SUCCEEDED' - - async def test_persistence_across_operations(request_queue_apify: RequestQueue) -> None: """Test that queue state persists across different operations.""" @@ -848,57 +771,6 @@ async def test_request_ordering_with_mixed_operations(request_queue_apify: Reque Actor.log.info('Request ordering verified successfully') -async def test_rq_isolation( - make_actor: MakeActorFunction, - run_actor: RunActorFunction, -) -> None: - """Test that different request queues are properly isolated.""" - - async def main() -> None: - async with Actor: - # Get the unique actor name for creating unique queue names - actor_name = Actor.configuration.actor_id - - # Open multiple queues with unique names - rq1 = await Actor.open_request_queue(name=f'{actor_name}-rq-1') - rq2 = await Actor.open_request_queue(name=f'{actor_name}-rq-2') - Actor.log.info('Opened two separate named queues with unique names') - - # Verify they are different instances - assert rq1 is not rq2, f'rq1 is rq2={rq1 is rq2}' - Actor.log.info('Verified queues are different instances') - - # Add different requests to each queue - await rq1.add_request('https://example.com/queue1-request') - await rq2.add_request('https://example.com/queue2-request') - Actor.log.info('Added different requests to each queue') - - # Verify isolation - req1 = await rq1.fetch_next_request() - req2 = await rq2.fetch_next_request() - - assert req1 is not None, f'req1={req1}' - assert 'queue1' in req1.url, f'req1.url={req1.url}' - assert req2 is not None, f'req2={req2}' - assert 'queue2' in req2.url, f'req2.url={req2.url}' - Actor.log.info(f'Queue 1 request: {req1.url}') - Actor.log.info(f'Queue 2 request: {req2.url}') - Actor.log.info('Queue isolation verified successfully') - - # Clean up - await rq1.mark_request_as_handled(req1) - await rq2.mark_request_as_handled(req2) - - # Drop queues - await rq1.drop() - await rq2.drop() - Actor.log.info('Dropped both queues') - - actor = await make_actor(label='rq-isolation-test', main_func=main) - run_result = await run_actor(actor) - assert run_result.status == 'SUCCEEDED' - - async def test_finished_state_accuracy(request_queue_apify: RequestQueue) -> None: """Test accuracy of is_finished() method in various scenarios.""" @@ -1237,3 +1109,86 @@ async def test_pre_existing_request_with_user_data( assert request_obtained is not None # Test that custom_data is preserved in user_data (custom_data should be subset of obtained user_data) assert custom_data.items() <= request_obtained.user_data.items() + + +async def test_force_cloud( + apify_client_async: ApifyClientAsync, + request_queue_apify: RequestQueue, +) -> None: + request_queue_id = (await request_queue_apify.get_metadata()).id + request_info = await request_queue_apify.add_request(Request.from_url('http://example.com')) + assert request_info.id is not None + request_queue_client = apify_client_async.request_queue(request_queue_id) + + request_queue_details = await request_queue_client.get() + assert request_queue_details is not None + assert request_queue_details.get('name') == request_queue_apify.name + + request_queue_request = await request_queue_client.get_request(request_info.id) + assert request_queue_request is not None + assert request_queue_request['url'] == 'http://example.com' + + +async def test_request_queue_is_finished( + request_queue_apify: RequestQueue, +) -> None: + await request_queue_apify.add_request(Request.from_url('http://example.com')) + assert not await request_queue_apify.is_finished() + + request = await request_queue_apify.fetch_next_request() + assert request is not None + assert not await request_queue_apify.is_finished(), ( + 'RequestQueue should not be finished unless the request is marked as handled.' + ) + + await request_queue_apify.mark_request_as_handled(request) + assert await request_queue_apify.is_finished() + + +async def test_request_queue_deduplication_unprocessed_requests( + request_queue_apify: RequestQueue, +) -> None: + """Test that the deduplication does not add unprocessed requests to the cache. + + In this test the first call is "hardcoded" to fail, even on all retries, so it never even sends the API request and + thus has no chance of increasing the `writeCount`. The second call can increase the `writeCount` only if it is not + cached, as cached requests do not make the call (tested in other tests). So this means the `unprocessedRequests` + request was intentionally not cached.""" + logging.getLogger('apify.storage_clients._apify._request_queue_client').setLevel(logging.DEBUG) + + await asyncio.sleep(10) # Wait to be sure that metadata are updated + + # Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client + rq_client = Actor.apify_client.request_queue(request_queue_id=request_queue_apify.id) + _rq = await rq_client.get() + assert _rq + stats_before = _rq.get('stats', {}) + Actor.log.info(stats_before) + + def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dict[str, list[dict]]: + """Simulate API returning unprocessed requests.""" + return { + 'processedRequests': [], + 'unprocessedRequests': [ + {'url': request['url'], 'uniqueKey': request['uniqueKey'], 'method': request['method']} + for request in requests + ], + } + + with mock.patch( + 'apify_client.clients.resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests', + side_effect=return_unprocessed_requests, + ): + # Simulate failed API call for adding requests. Request was not processed and should not be cached. + await request_queue_apify.add_requests(['http://example.com/1']) + + # This will succeed. + await request_queue_apify.add_requests(['http://example.com/1']) + + await asyncio.sleep(10) # Wait to be sure that metadata are updated + _rq = await rq_client.get() + assert _rq + stats_after = _rq.get('stats', {}) + Actor.log.info(stats_after) + + assert (stats_after['writeCount'] - stats_before['writeCount']) == 1 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index aea770db..5e597be7 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,92 +1,13 @@ from __future__ import annotations -import base64 -import inspect import os -import subprocess -import sys -import textwrap -from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol import pytest -from filelock import FileLock -from apify_client import ApifyClient, ApifyClientAsync -from apify_shared.consts import ActorJobStatus, ActorSourceType, ApifyEnvVars -from crawlee import service_locator - -import apify._actor -from ._utils import generate_unique_resource_name -from apify import Actor -from apify._models import ActorRun -from apify.storage_clients import ApifyStorageClient -from apify.storage_clients._apify._utils import AliasResolver -from apify.storages import RequestQueue - -if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Awaitable, Callable, Coroutine, Iterator, Mapping - from decimal import Decimal - - from apify_client.clients.resource_clients import ActorClientAsync +from apify_client import ApifyClientAsync _TOKEN_ENV_VAR = 'APIFY_TEST_USER_API_TOKEN' _API_URL_ENV_VAR = 'APIFY_INTEGRATION_TESTS_API_URL' -_SDK_ROOT_PATH = Path(__file__).parent.parent.parent.resolve() - - -@pytest.fixture -def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]: - """Prepare the testing environment by resetting the global state before each test. - - This fixture ensures that the global state of the package is reset to a known baseline before each test runs. - It also configures a temporary storage directory for test isolation. - - Args: - monkeypatch: Test utility provided by pytest for patching. - tmp_path: A unique temporary directory path provided by pytest for test isolation. - - Returns: - A callable that prepares the test environment. - """ - - def _prepare_test_env() -> None: - # Reset the Actor class state. - apify._actor.Actor.__wrapped__.__class__._is_any_instance_initialized = False # type: ignore[attr-defined] - apify._actor.Actor.__wrapped__.__class__._is_rebooting = False # type: ignore[attr-defined] - delattr(apify._actor.Actor, '__wrapped__') - - # Set the environment variable for the local storage directory to the temporary path. - monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) - - # Reset the services in the service locator. - service_locator._configuration = None - service_locator._event_manager = None - service_locator._storage_client = None - service_locator.storage_instance_manager.clear_cache() - - # Reset the AliasResolver class state. - AliasResolver._alias_map = {} - AliasResolver._alias_init_lock = None - - # Verify that the test environment was set up correctly. - assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path) - - return _prepare_test_env - - -@pytest.fixture(autouse=True) -def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None: - """Isolate the testing environment by resetting global state before and after each test. - - This fixture ensures that each test starts with a clean slate and that any modifications during the test - do not affect subsequent tests. It runs automatically for all tests. - - Args: - prepare_test_env: Fixture to prepare the environment before each test. - """ - - prepare_test_env() @pytest.fixture(scope='session') @@ -105,313 +26,3 @@ def apify_client_async(apify_token: str) -> ApifyClientAsync: api_url = os.getenv(_API_URL_ENV_VAR) return ApifyClientAsync(apify_token, api_url=api_url) - - -@pytest.fixture(params=['single', 'shared']) -async def request_queue_apify( - apify_token: str, monkeypatch: pytest.MonkeyPatch, request: pytest.FixtureRequest -) -> AsyncGenerator[RequestQueue]: - """Create an instance of the Apify request queue on the platform and drop it when the test is finished.""" - monkeypatch.setenv(ApifyEnvVars.TOKEN, apify_token) - - async with Actor: - rq = await RequestQueue.open(storage_client=ApifyStorageClient(request_queue_access=request.param)) - yield rq - await rq.drop() - - -@pytest.fixture(scope='session') -def sdk_wheel_path(tmp_path_factory: pytest.TempPathFactory, testrun_uid: str) -> Path: - """Build the package wheel if it hasn't been built yet, and return the path to the wheel.""" - # Make sure the wheel is not being built concurrently across all the pytest-xdist runners, - # through locking the building process with a temp file. - with FileLock(tmp_path_factory.getbasetemp().parent / 'sdk_wheel_build.lock'): - # Make sure the wheel is built exactly once across across all the pytest-xdist runners, - # through an indicator file saying that the wheel was already built. - was_wheel_built_this_test_run_file = tmp_path_factory.getbasetemp() / f'wheel_was_built_in_run_{testrun_uid}' - if not was_wheel_built_this_test_run_file.exists(): - subprocess.run( - args='python -m build', - cwd=_SDK_ROOT_PATH, - shell=True, - check=True, - capture_output=True, - ) - was_wheel_built_this_test_run_file.touch() - - # Read the current package version, necessary for getting the right wheel filename. - pyproject_toml_file = (_SDK_ROOT_PATH / 'pyproject.toml').read_text(encoding='utf-8') - for line in pyproject_toml_file.splitlines(): - if line.startswith('version = '): - delim = '"' if '"' in line else "'" - sdk_version = line.split(delim)[1] - break - else: - raise RuntimeError('Unable to find version string.') - - wheel_path = _SDK_ROOT_PATH / 'dist' / f'apify-{sdk_version}-py3-none-any.whl' - - # Just to be sure. - assert wheel_path.exists() - - return wheel_path - - -@pytest.fixture(scope='session') -def actor_base_source_files(sdk_wheel_path: Path) -> dict[str, str | bytes]: - """Create a dictionary of the base source files for a testing Actor. - - It takes the files from `tests/integration/actor_source_base`, builds the Apify SDK wheel from - the current codebase, and adds them all together in a dictionary. - """ - source_files: dict[str, str | bytes] = {} - - # First read the actor_source_base files - sdk_root_path = Path(__file__).parent.parent.parent.resolve() - actor_source_base_path = sdk_root_path / 'tests/integration/actor_source_base' - - for path in actor_source_base_path.glob('**/*'): - if not path.is_file(): - continue - relative_path = str(path.relative_to(actor_source_base_path)) - try: - source_files[relative_path] = path.read_text(encoding='utf-8') - except ValueError: - source_files[relative_path] = path.read_bytes() - - sdk_wheel_file_name = sdk_wheel_path.name - source_files[sdk_wheel_file_name] = sdk_wheel_path.read_bytes() - - source_files['requirements.txt'] = str(source_files['requirements.txt']).replace( - 'APIFY_SDK_WHEEL_PLACEHOLDER', f'./{sdk_wheel_file_name}' - ) - - current_major_minor_python_version = '.'.join([str(x) for x in sys.version_info[:2]]) - integration_tests_python_version = ( - os.getenv('INTEGRATION_TESTS_PYTHON_VERSION') or current_major_minor_python_version - ) - source_files['Dockerfile'] = str(source_files['Dockerfile']).replace( - 'BASE_IMAGE_VERSION_PLACEHOLDER', integration_tests_python_version - ) - - return source_files - - -class MakeActorFunction(Protocol): - """A type for the `make_actor` fixture.""" - - def __call__( - self, - label: str, - *, - main_func: Callable | None = None, - main_py: str | None = None, - source_files: Mapping[str, str | bytes] | None = None, - additional_requirements: list[str] | None = None, - ) -> Awaitable[ActorClientAsync]: - """Create a temporary Actor from the given main function or source files. - - The Actor will be uploaded to the Apify Platform, built there, and after the test finishes, it will - be automatically deleted. - - You have to pass exactly one of the `main_func`, `main_py` and `source_files` arguments. - - Args: - label: The label which will be a part of the generated Actor name. - main_func: The main function of the Actor. - main_py: The `src/main.py` file of the Actor. - source_files: A dictionary of the source files of the Actor. - additional_requirements: A list of additional requirements to be added to the `requirements.txt`. - - Returns: - A resource client for the created Actor. - """ - - -@pytest.fixture(scope='session') -def make_actor( - actor_base_source_files: dict[str, str | bytes], - apify_token: str, -) -> Iterator[MakeActorFunction]: - """Fixture for creating temporary Actors for testing purposes. - - This returns a function that creates a temporary Actor from the given main function or source files. The Actor - will be uploaded to the Apify Platform, built there, and after the test finishes, it will be automatically deleted. - """ - actors_for_cleanup: list[str] = [] - - async def _make_actor( - label: str, - *, - main_func: Callable | None = None, - main_py: str | None = None, - source_files: Mapping[str, str | bytes] | None = None, - additional_requirements: list[str] | None = None, - ) -> ActorClientAsync: - if not (main_func or main_py or source_files): - raise TypeError('One of `main_func`, `main_py` or `source_files` arguments must be specified') - - if (main_func and main_py) or (main_func and source_files) or (main_py and source_files): - raise TypeError('Cannot specify more than one of `main_func`, `main_py` and `source_files` arguments') - - client = ApifyClientAsync(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)) - actor_name = generate_unique_resource_name(label) - - # Get the source of main_func and convert it into a reasonable main_py file. - if main_func: - func_source = textwrap.dedent(inspect.getsource(main_func)) - func_source = func_source.replace(f'def {main_func.__name__}(', 'def main(') - main_py = '\n'.join( # noqa: FLY002 - [ - 'import asyncio', - '', - 'from apify import Actor', - '', - '', - '', - func_source, - ] - ) - - if main_py: - source_files = {'src/main.py': main_py} - - assert source_files is not None - - # Copy the source files dict from the fixture so that we're not overwriting it, and merge the passed - # argument in it. - actor_source_files = actor_base_source_files.copy() - actor_source_files.update(source_files) - - if additional_requirements: - # Get the current requirements.txt content (as a string). - req_content = actor_source_files.get('requirements.txt', '') - if isinstance(req_content, bytes): - req_content = req_content.decode('utf-8') - # Append the additional requirements, each on a new line. - additional_reqs = '\n'.join(additional_requirements) - req_content = req_content.strip() + '\n' + additional_reqs + '\n' - actor_source_files['requirements.txt'] = req_content - - # Reformat the source files in a format that the Apify API understands. - source_files_for_api = [] - for file_name, file_contents in actor_source_files.items(): - if isinstance(file_contents, str): - file_format = 'TEXT' - if file_name.endswith('.py'): - file_contents = textwrap.dedent(file_contents).lstrip() # noqa: PLW2901 - else: - file_format = 'BASE64' - file_contents = base64.b64encode(file_contents).decode('utf-8') # noqa: PLW2901 - - source_files_for_api.append( - { - 'name': file_name, - 'format': file_format, - 'content': file_contents, - } - ) - - print(f'Creating Actor {actor_name}...') - created_actor = await client.actors().create( - name=actor_name, - default_run_build='latest', - default_run_memory_mbytes=256, - default_run_timeout_secs=600, - versions=[ - { - 'versionNumber': '0.0', - 'buildTag': 'latest', - 'sourceType': ActorSourceType.SOURCE_FILES, - 'sourceFiles': source_files_for_api, - } - ], - ) - - actor_client = client.actor(created_actor['id']) - - print(f'Building Actor {actor_name}...') - build_result = await actor_client.build(version_number='0.0') - build_client = client.build(build_result['id']) - build_client_result = await build_client.wait_for_finish(wait_secs=600) - - assert build_client_result is not None - assert build_client_result['status'] == ActorJobStatus.SUCCEEDED - - # We only mark the client for cleanup if the build succeeded, so that if something goes wrong here, - # you have a chance to check the error. - actors_for_cleanup.append(created_actor['id']) - - return actor_client - - yield _make_actor - - client = ApifyClient(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)) - - # Delete all the generated Actors. - for actor_id in actors_for_cleanup: - actor_client = client.actor(actor_id) - - if (actor := actor_client.get()) is not None: - actor_client.update( - pricing_infos=[ - *actor.get('pricingInfos', []), - { - 'pricingModel': 'FREE', - }, - ] - ) - - actor_client.delete() - - -class RunActorFunction(Protocol): - """A type for the `run_actor` fixture.""" - - def __call__( - self, - actor: ActorClientAsync, - *, - run_input: Any = None, - max_total_charge_usd: Decimal | None = None, - ) -> Coroutine[None, None, ActorRun]: - """Initiate an Actor run and wait for its completion. - - Args: - actor: Actor async client, in testing context usually created by `make_actor` fixture. - run_input: Optional input for the Actor run. - - Returns: - Actor run result. - """ - - -@pytest.fixture(scope='session') -def run_actor(apify_token: str) -> RunActorFunction: - """Fixture for calling an Actor run and waiting for its completion. - - This fixture returns a function that initiates an Actor run with optional run input, waits for its completion, - and retrieves the final result. It uses the `wait_for_finish` method with a timeout of 10 minutes. - """ - - async def _run_actor( - actor: ActorClientAsync, - *, - run_input: Any = None, - max_total_charge_usd: Decimal | None = None, - ) -> ActorRun: - call_result = await actor.call( - run_input=run_input, - max_total_charge_usd=max_total_charge_usd, - ) - - assert isinstance(call_result, dict), 'The result of ActorClientAsync.call() is not a dictionary.' - assert 'id' in call_result, 'The result of ActorClientAsync.call() does not contain an ID.' - - client = ApifyClientAsync(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)) - run_client = client.run(call_result['id']) - run_result = await run_client.wait_for_finish(wait_secs=600) - - return ActorRun.model_validate(run_result) - - return _run_actor From 103726bbcb7de501f3bde60fb2dfba830f03864c Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Fri, 24 Oct 2025 11:47:49 +0200 Subject: [PATCH 2/7] Fix missing import lost during the move --- tests/integration/actor/test_actor_request_queue.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/actor/test_actor_request_queue.py b/tests/integration/actor/test_actor_request_queue.py index 9b61fe51..d258482e 100644 --- a/tests/integration/actor/test_actor_request_queue.py +++ b/tests/integration/actor/test_actor_request_queue.py @@ -1,7 +1,6 @@ from __future__ import annotations import asyncio -import logging from typing import TYPE_CHECKING import pytest @@ -186,6 +185,8 @@ async def test_request_queue_parallel_deduplication( third worker adding 10 new requests and 20 known requests and so on""" async def main() -> None: + import logging + from apify import Actor, Request worker_count = 10 From 4ff953f88e8f798b2d8b015fb1bac580b4eed46f Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 29 Oct 2025 09:25:17 +0100 Subject: [PATCH 3/7] Update to match crawlee 1.0.4 --- pyproject.toml | 2 +- tests/integration/actor/test_actor_lifecycle.py | 6 ++++-- uv.lock | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 63597665..72938078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ keywords = [ dependencies = [ "apify-client>=2.2.0,<3.0.0", "apify-shared>=2.0.0,<3.0.0", - "crawlee>=1.0.2,<2.0.0", + "crawlee>=1.0.4,<2.0.0", "cachetools>=5.5.0", "cryptography>=42.0.0", "impit>=0.6.1", diff --git a/tests/integration/actor/test_actor_lifecycle.py b/tests/integration/actor/test_actor_lifecycle.py index 649dc1d1..ddd367a9 100644 --- a/tests/integration/actor/test_actor_lifecycle.py +++ b/tests/integration/actor/test_actor_lifecycle.py @@ -122,7 +122,8 @@ async def main() -> None: async def test_actor_with_crawler_reboot(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None: """Test that crawler in actor works as expected after reboot. - Handle two requests. Reboot in between the two requests.""" + Handle two requests. Reboot in between the two requests. The second run should include statistics of the fist run. + """ async def main() -> None: from crawlee._types import BasicCrawlingContext, ConcurrencySettings @@ -152,7 +153,8 @@ async def default_handler(context: BasicCrawlingContext) -> None: await crawler.run(requests) # Each time one request is finished. - assert crawler.statistics.state.requests_finished == 1 + expected_requests_finished = 1 if first_run else 2 + assert crawler.statistics.state.requests_finished == expected_requests_finished actor = await make_actor(label='migration', main_func=main) run_result = await run_actor(actor) diff --git a/uv.lock b/uv.lock index 89a855cc..b52f6358 100644 --- a/uv.lock +++ b/uv.lock @@ -76,7 +76,7 @@ requires-dist = [ { name = "apify-client", specifier = ">=2.2.0,<3.0.0" }, { name = "apify-shared", specifier = ">=2.0.0,<3.0.0" }, { name = "cachetools", specifier = ">=5.5.0" }, - { name = "crawlee", specifier = ">=1.0.2,<2.0.0" }, + { name = "crawlee", specifier = ">=1.0.4,<2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, { name = "impit", specifier = ">=0.6.1" }, { name = "lazy-object-proxy", specifier = ">=1.11.0" }, @@ -541,7 +541,7 @@ toml = [ [[package]] name = "crawlee" -version = "1.0.3" +version = "1.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cachetools" }, @@ -557,9 +557,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f2/db/49a90351f1686cf63b7e88a6899e8819b523a103e2391c89d3b9d1ed5cbb/crawlee-1.0.3.tar.gz", hash = "sha256:b2a35c4b28eda994ff34c94e23ccc9337a164a61d9f5c8ed9387b5a7d7ba8cd3", size = 24915508, upload-time = "2025-10-17T08:55:42.201Z" } +sdist = { url = "https://files.pythonhosted.org/packages/42/9b/a67e2f2faa3a7c9a377321b93babc4d643cb9a8158f20d995536bd1dba82/crawlee-1.0.4.tar.gz", hash = "sha256:e781d9e09048ca7469900a4f1b934c26825698bd4b6f645b9959328a9bc43576", size = 24917824, upload-time = "2025-10-24T09:50:55.642Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/b9/f12caa3ec7c119bdf10f10213c53edd15518b6485e2266b6b75fcaf917c5/crawlee-1.0.3-py3-none-any.whl", hash = "sha256:e08ed85fbb3d0d39a33d4a5827d517216e699dccd347ef314c98f2e5fd45dff3", size = 305210, upload-time = "2025-10-17T08:55:39.773Z" }, + { url = "https://files.pythonhosted.org/packages/0e/d4/c8e06532116073baf96bcb86440378376c5ab30c21ddf0c92b656507836f/crawlee-1.0.4-py3-none-any.whl", hash = "sha256:3168d237661a793b483a4aee02e094cc40b767e4ffc07413cdc014d29ac089e8", size = 306026, upload-time = "2025-10-24T09:50:52.904Z" }, ] [package.optional-dependencies] From c59a680d1128787ca703a39ec9b2d4a9ba6c3308 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 29 Oct 2025 10:53:40 +0100 Subject: [PATCH 4/7] Fix wrong merge from master --- .../actor/test_actor_request_queue.py | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/tests/integration/actor/test_actor_request_queue.py b/tests/integration/actor/test_actor_request_queue.py index d258482e..8a8ba7e9 100644 --- a/tests/integration/actor/test_actor_request_queue.py +++ b/tests/integration/actor/test_actor_request_queue.py @@ -13,7 +13,6 @@ from apify_client import ApifyClientAsync from .conftest import MakeActorFunction, RunActorFunction - from apify.storages import RequestQueue async def test_same_references_in_default_rq( @@ -60,24 +59,6 @@ async def main() -> None: assert run_result.status == 'SUCCEEDED' -async def test_force_cloud( - apify_client_async: ApifyClientAsync, - apify_named_rq: RequestQueue, -) -> None: - request_queue_id = (await apify_named_rq.get_metadata()).id - request_info = await apify_named_rq.add_request(Request.from_url('http://example.com')) - assert request_info.id is not None - request_queue_client = apify_client_async.request_queue(request_queue_id) - - request_queue_details = await request_queue_client.get() - assert request_queue_details is not None - assert request_queue_details.get('name') == apify_named_rq.name - - request_queue_request = await request_queue_client.get_request(request_info.id) - assert request_queue_request is not None - assert request_queue_request['url'] == 'http://example.com' - - async def test_request_queue_deduplication( make_actor: MakeActorFunction, run_actor: RunActorFunction, @@ -91,7 +72,7 @@ async def test_request_queue_deduplication( """ async def main() -> None: - from apify import Actor, Request + from apify import Actor async with Actor: request1 = Request.from_url('http://example.com', method='POST') @@ -138,7 +119,7 @@ async def test_request_queue_deduplication_use_extended_unique_key( """ async def main() -> None: - from apify import Actor, Request + from apify import Actor async with Actor: request1 = Request.from_url('http://example.com', method='POST', use_extended_unique_key=True) @@ -187,7 +168,7 @@ async def test_request_queue_parallel_deduplication( async def main() -> None: import logging - from apify import Actor, Request + from apify import Actor worker_count = 10 max_requests = 100 @@ -315,7 +296,6 @@ async def test_rq_defaults( run_actor: RunActorFunction, ) -> None: async def main() -> None: - from apify import Request from apify.storages import RequestQueue async with Actor: @@ -356,7 +336,6 @@ async def test_rq_aliases( run_actor: RunActorFunction, ) -> None: async def main() -> None: - from apify import Request from apify.storages import RequestQueue async with Actor: From 302fe2d22bbc37e51019800962419a5de330311d Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 29 Oct 2025 16:05:54 +0100 Subject: [PATCH 5/7] Update test readme --- tests/integration/README.md | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index 25bf58db..1d63e85e 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -1,14 +1,25 @@ # Integration tests -We have integration tests which build and run Actors using the Python SDK on the Apify Platform. To run these tests, you need to set the `APIFY_TEST_USER_API_TOKEN` environment variable to the API token of the Apify user you want to use for the tests, and then start them with `make integration-tests`. +There are two different groups of integration tests in this repository: +- Apify API integration tests. These test that the Apify SDK is correctly communicating with Apify API through Apify client. +- Actor integration tests. These test that the Apify SDK can be used in Actors deployed to Apify platform. These are very high level tests, and they test communication with the API and correct interaction with the Apify platform. + +To run these tests, you need to set the `APIFY_TEST_USER_API_TOKEN` environment variable to the API token of the Apify user you want to use for the tests, and then start them with `make integration-tests`. + +## Apify API integration tests +The tests are making real requests to the Apify API as opposed to the unit tests that are mocking such API calls. On the other hand they are faster than `Actor integration tests` as they do not require building and deploying the actor. These test can be also fully debugged locally. Preferably try to write integration tests on this level if possible. + + +## Actor integration tests +We have integration tests which build and run Actors using the Python SDK on the Apify Platform. These integration tests are slower than `Apify API integration tests` as they need to build and deploy Actors on the platform. Preferably try to write `Apify API integration tests` first, and only write `Actor integration tests` when you need to test something that can only be tested on the platform. If you want to run the integration tests on a different environment than the main Apify Platform, you need to set the `APIFY_INTEGRATION_TESTS_API_URL` environment variable to the right URL to the Apify API you want to use. -## How to write tests +### How to write tests There are two fixtures which you can use to write tests: -### `apify_client_async` +#### `apify_client_async` This fixture just gives you an instance of `ApifyClientAsync` configured with the right token and API URL, so you don't have to do that yourself. @@ -17,7 +28,7 @@ async def test_something(apify_client_async: ApifyClientAsync) -> None: assert await apify_client_async.user('me').get() is not None ``` -### `make_actor` +#### `make_actor` This fixture returns a factory function for creating Actors on the Apify Platform. @@ -25,7 +36,7 @@ For the Actor source, the fixture takes the files from `tests/integration/actor_ The created Actor will be uploaded to the platform, built there, and after the test finishes, it will be automatically deleted. If the Actor build fails, it will not be deleted, so that you can check why the build failed. -### Creating test Actor straight from a Python function +#### Creating test Actor straight from a Python function You can create Actors straight from a Python function. This is great because you can have the test Actor source code checked with the linter. @@ -66,7 +77,7 @@ async def test_something( assert run_result.status == 'SUCCEEDED' ``` -### Creating Actor from source files +#### Creating Actor from source files You can also pass the source files directly if you need something more complex (e.g. pass some fixed value to the Actor source code or use multiple source files). @@ -127,7 +138,7 @@ async def test_something( assert actor_run.status == 'SUCCEEDED' ``` -### Asserts +#### Asserts Since test Actors are not executed as standard pytest tests, we don't get introspection of assertion expressions. In case of failure, only a bare `AssertionError` is shown, without the left and right values. This means, we must include explicit assertion messages to aid potential debugging. From 95f999549c1184cdc678ec62aacb1b9bcf71ccd0 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 29 Oct 2025 16:10:53 +0100 Subject: [PATCH 6/7] Update after merge --- tests/integration/actor/test_actor_request_queue.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/integration/actor/test_actor_request_queue.py b/tests/integration/actor/test_actor_request_queue.py index 8a8ba7e9..a7a6059f 100644 --- a/tests/integration/actor/test_actor_request_queue.py +++ b/tests/integration/actor/test_actor_request_queue.py @@ -6,7 +6,7 @@ import pytest from .._utils import generate_unique_resource_name -from apify import Actor, Request +from apify import Actor from apify._models import ActorRun if TYPE_CHECKING: @@ -72,6 +72,8 @@ async def test_request_queue_deduplication( """ async def main() -> None: + from crawlee import Request + from apify import Actor async with Actor: @@ -119,6 +121,8 @@ async def test_request_queue_deduplication_use_extended_unique_key( """ async def main() -> None: + from crawlee import Request + from apify import Actor async with Actor: @@ -168,6 +172,8 @@ async def test_request_queue_parallel_deduplication( async def main() -> None: import logging + from crawlee import Request + from apify import Actor worker_count = 10 @@ -296,6 +302,8 @@ async def test_rq_defaults( run_actor: RunActorFunction, ) -> None: async def main() -> None: + from crawlee import Request + from apify.storages import RequestQueue async with Actor: @@ -336,6 +344,8 @@ async def test_rq_aliases( run_actor: RunActorFunction, ) -> None: async def main() -> None: + from crawlee import Request + from apify.storages import RequestQueue async with Actor: From 9a31f13482452c0e2a1d5c922221b51078e3dbc3 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 29 Oct 2025 16:46:46 +0100 Subject: [PATCH 7/7] Fix capitalization of Actor nad platform --- tests/integration/README.md | 8 ++++---- tests/integration/actor/test_actor_lifecycle.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index 1d63e85e..4e07ec51 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -7,13 +7,13 @@ There are two different groups of integration tests in this repository: To run these tests, you need to set the `APIFY_TEST_USER_API_TOKEN` environment variable to the API token of the Apify user you want to use for the tests, and then start them with `make integration-tests`. ## Apify API integration tests -The tests are making real requests to the Apify API as opposed to the unit tests that are mocking such API calls. On the other hand they are faster than `Actor integration tests` as they do not require building and deploying the actor. These test can be also fully debugged locally. Preferably try to write integration tests on this level if possible. +The tests are making real requests to the Apify API as opposed to the unit tests that are mocking such API calls. On the other hand they are faster than `Actor integration tests` as they do not require building and deploying the Actor. These test can be also fully debugged locally. Preferably try to write integration tests on this level if possible. ## Actor integration tests -We have integration tests which build and run Actors using the Python SDK on the Apify Platform. These integration tests are slower than `Apify API integration tests` as they need to build and deploy Actors on the platform. Preferably try to write `Apify API integration tests` first, and only write `Actor integration tests` when you need to test something that can only be tested on the platform. +We have integration tests which build and run Actors using the Python SDK on the Apify platform. These integration tests are slower than `Apify API integration tests` as they need to build and deploy Actors on the platform. Preferably try to write `Apify API integration tests` first, and only write `Actor integration tests` when you need to test something that can only be tested on the platform. -If you want to run the integration tests on a different environment than the main Apify Platform, you need to set the `APIFY_INTEGRATION_TESTS_API_URL` environment variable to the right URL to the Apify API you want to use. +If you want to run the integration tests on a different environment than the main Apify platform, you need to set the `APIFY_INTEGRATION_TESTS_API_URL` environment variable to the right URL to the Apify API you want to use. ### How to write tests @@ -30,7 +30,7 @@ async def test_something(apify_client_async: ApifyClientAsync) -> None: #### `make_actor` -This fixture returns a factory function for creating Actors on the Apify Platform. +This fixture returns a factory function for creating Actors on the Apify platform. For the Actor source, the fixture takes the files from `tests/integration/actor_source_base`, builds the Apify SDK wheel from the current codebase, and adds the Actor source you passed to the fixture as an argument. You have to pass exactly one of the `main_func`, `main_py` and `source_files` arguments. diff --git a/tests/integration/actor/test_actor_lifecycle.py b/tests/integration/actor/test_actor_lifecycle.py index ddd367a9..e2f98e6d 100644 --- a/tests/integration/actor/test_actor_lifecycle.py +++ b/tests/integration/actor/test_actor_lifecycle.py @@ -122,7 +122,7 @@ async def main() -> None: async def test_actor_with_crawler_reboot(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None: """Test that crawler in actor works as expected after reboot. - Handle two requests. Reboot in between the two requests. The second run should include statistics of the fist run. + Handle two requests. Reboot in between the two requests. The second run should include statistics of the first run. """ async def main() -> None: