diff --git a/docs/examples/code_examples/respect_robots_txt_file.py b/docs/examples/code_examples/respect_robots_txt_file.py
new file mode 100644
index 0000000000..ebd63b1c2e
--- /dev/null
+++ b/docs/examples/code_examples/respect_robots_txt_file.py
@@ -0,0 +1,27 @@
+import asyncio
+
+from crawlee.crawlers import (
+    BeautifulSoupCrawler,
+    BeautifulSoupCrawlingContext,
+)
+
+
+async def main() -> None:
+    # Initialize the crawler with robots.txt compliance enabled
+    crawler = BeautifulSoupCrawler(respect_robots_txt_file=True)
+
+    @crawler.router.default_handler
+    async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+    # Start the crawler with the specified URLs
+    # The crawler will check the robots.txt file before making requests
+    # In this example, 'https://news.ycombinator.com/login' will be skipped
+    # because it's disallowed in the site's robots.txt file
+    await crawler.run(
+        ['https://news.ycombinator.com/', 'https://news.ycombinator.com/login']
+    )
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/examples/respect_robots_txt_file.mdx b/docs/examples/respect_robots_txt_file.mdx
new file mode 100644
index 0000000000..5f6194c919
--- /dev/null
+++ b/docs/examples/respect_robots_txt_file.mdx
@@ -0,0 +1,21 @@
+---
+id: respect-robots-txt-file
+title: Respect robots.txt file
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
+
+import RespectRobotsTxt from '!!raw-loader!roa-loader!./code_examples/respect_robots_txt_file.py';
+
+This example demonstrates how to configure your crawler to respect the rules established by websites for crawlers as described in the [robots.txt](https://www.robotstxt.org/robotstxt.html) file.
+
+To configure `Crawlee` to follow the `robots.txt` file, set the parameter `respect_robots_txt_file=True` in <ApiLink to="class/BasicCrawlerOptions">`BasicCrawlerOptions`</ApiLink>. In this case, `Crawlee` will skip any URLs forbidden in the website's robots.txt file.
+
+As an example, let's look at the website `https://news.ycombinator.com/` and its corresponding [robots.txt](https://news.ycombinator.com/robots.txt) file. Since the file has a rule `Disallow: /login`, the URL `https://news.ycombinator.com/login` will be automatically skipped.
+
+The code below demonstrates this behavior using the <ApiLink to="class/BeautifulSoupCrawler">`BeautifulSoupCrawler`</ApiLink>:
+
+<RunnableCodeBlock className="language-python" language="python">
+    {RespectRobotsTxt}
+</RunnableCodeBlock>
diff --git a/pyproject.toml b/pyproject.toml
index 0c6c4596c0..77b1fe2d1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = [
     "eval-type-backport>=0.2.0",
     "httpx[brotli,http2,zstd]>=0.27.0",
     "more-itertools>=10.2.0",
+    "protego>=0.4.0",
     "psutil>=6.0.0",
     "pydantic-settings>=2.2.0,<2.7.0",
     "pydantic>=2.8.0,!=2.10.0,!=2.10.1,!=2.10.2",
@@ -236,7 +237,9 @@ module = [
     "functions_framework",          # Example code shows deploy on Google Cloud.
     "jaro",                         # Untyped and stubs not available
     "loguru",                       # Example code shows integration of loguru and crawlee for JSON logging.
+    "protego",                      # Untyped and stubs not available
     "sklearn.linear_model",         # Untyped and stubs not available
+    "sortedcollections",            # Untyped and stubs not available
     "cookiecutter.*",               # Untyped and stubs not available
     "inquirer.*",                   # Untyped and stubs not available
 ]
diff --git a/src/crawlee/_utils/robots.py b/src/crawlee/_utils/robots.py
new file mode 100644
index 0000000000..930ae09431
--- /dev/null
+++ b/src/crawlee/_utils/robots.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from protego import Protego
+from yarl import URL
+
+from crawlee._utils.web import is_status_code_client_error
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+    from crawlee.http_clients import HttpClient
+    from crawlee.proxy_configuration import ProxyInfo
+
+
+class RobotsTxtFile:
+    def __init__(self, url: str, robots: Protego) -> None:
+        self._robots = robots
+        self._original_url = URL(url).origin()
+
+    @classmethod
+    async def from_content(cls, url: str, content: str) -> Self:
+        """Create a `RobotsTxtFile` instance from the given content.
+
+        Args:
+            url: The URL associated with the robots.txt file.
+            content: The raw string content of the robots.txt file to be parsed.
+        """
+        robots = Protego.parse(content)
+        return cls(url, robots)
+
+    @classmethod
+    async def find(cls, url: str, http_client: HttpClient, proxy_info: ProxyInfo | None = None) -> Self:
+        """Determine the location of a robots.txt file for a URL and fetch it.
+
+        Args:
+            url: The URL whose domain will be used to find the corresponding robots.txt file.
+            http_client: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
+            proxy_info: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
+        """
+        robots_url = URL(url).with_path('/robots.txt')
+        return await cls.load(str(robots_url), http_client, proxy_info)
+
+    @classmethod
+    async def load(cls, url: str, http_client: HttpClient, proxy_info: ProxyInfo | None = None) -> Self:
+        """Load the robots.txt file for a given URL.
+
+        Args:
+            url: The direct URL of the robots.txt file to be loaded.
+            http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
+            proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
+        """
+        response = await http_client.send_request(url, proxy_info=proxy_info)
+        body = b'User-agent: *\nAllow: /' if is_status_code_client_error(response.status_code) else response.read()
+
+        robots = Protego.parse(body.decode('utf-8'))
+
+        return cls(url, robots)
+
+    def is_allowed(self, url: str, user_agent: str = '*') -> bool:
+        """Check if the given URL is allowed for the given user agent.
+
+        Args:
+            url: The URL to check against the robots.txt rules.
+            user_agent: The user-agent string to check permissions for. Defaults to '*' which matches any user-agent.
+        """
+        check_url = URL(url)
+        if check_url.origin() != self._original_url:
+            return True
+        return bool(self._robots.can_fetch(str(check_url), user_agent))
+
+    def get_sitemaps(self) -> list[str]:
+        """Get the list of sitemaps urls from the robots.txt file."""
+        return list(self._robots.sitemaps)
+
+    def get_crawl_delay(self, user_agent: str = '*') -> int | None:
+        """Get the crawl delay for the given user agent.
+
+        Args:
+            user_agent: The user-agent string to check the crawl delay for. Defaults to '*' which matches any
+                user-agent.
+        """
+        crawl_delay = self._robots.crawl_delay(user_agent)
+        return int(crawl_delay) if crawl_delay is not None else None
diff --git a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
index 04e16683f6..9abcb4c6f5 100644
--- a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
+++ b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
@@ -159,12 +159,19 @@ async def extract_links(
             requests = list[Request]()
             base_user_data = user_data or {}
 
+            robots_txt_file = await self._get_robots_txt_file_for_url(context.request.url)
+
             for link in self._parser.find_links(parsed_content, selector=selector):
                 url = link
                 if not is_url_absolute(url):
                     base_url = context.request.loaded_url or context.request.url
                     url = convert_to_absolute_url(base_url, url)
 
+                if robots_txt_file and not robots_txt_file.is_allowed(url):
+                    # TODO: https://github.com/apify/crawlee-python/issues/1160
+                    # add processing with on_skipped_request hook
+                    continue
+
                 request_options = RequestOptions(url=url, user_data={**base_user_data}, label=label)
 
                 if transform_request_function:
diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index c69db280a6..49b28c043e 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -17,8 +17,10 @@
 from urllib.parse import ParseResult, urlparse
 from weakref import WeakKeyDictionary
 
+from cachetools import LRUCache
 from tldextract import TLDExtract
 from typing_extensions import NotRequired, TypedDict, TypeVar, Unpack, assert_never
+from yarl import URL
 
 from crawlee import EnqueueStrategy, Glob, service_locator
 from crawlee._autoscaling import AutoscaledPool, Snapshotter, SystemStatus
@@ -32,6 +34,7 @@
     SendRequestFunction,
 )
 from crawlee._utils.docs import docs_group
+from crawlee._utils.robots import RobotsTxtFile
 from crawlee._utils.urls import convert_to_absolute_url, is_url_absolute
 from crawlee._utils.wait import wait_for
 from crawlee._utils.web import is_status_code_client_error, is_status_code_server_error
@@ -158,6 +161,10 @@ class _BasicCrawlerOptions(TypedDict):
     """A logger instance, typically provided by a subclass, for consistent logging labels. Intended for use by
     subclasses rather than direct instantiation of `BasicCrawler`."""
 
+    respect_robots_txt_file: NotRequired[bool]
+    """If set to `True`, the crawler will automatically try to fetch the robots.txt file for each domain,
+    and skip those that are not allowed. This also prevents disallowed URLs to be added via `EnqueueLinksFunction`."""
+
 
 class _BasicCrawlerOptionsGeneric(Generic[TCrawlingContext, TStatisticsState], TypedDict):
     """Generic options the `BasicCrawler` constructor."""
@@ -238,6 +245,7 @@ def __init__(
         keep_alive: bool = False,
         configure_logging: bool = True,
         statistics_log_format: Literal['table', 'inline'] = 'table',
+        respect_robots_txt_file: bool = False,
         _context_pipeline: ContextPipeline[TCrawlingContext] | None = None,
         _additional_context_managers: Sequence[AbstractAsyncContextManager] | None = None,
         _logger: logging.Logger | None = None,
@@ -280,6 +288,9 @@ def __init__(
             configure_logging: If True, the crawler will set up logging infrastructure automatically.
             statistics_log_format: If 'table', displays crawler statistics as formatted tables in logs. If 'inline',
                 outputs statistics as plain text log messages.
+            respect_robots_txt_file: If set to `True`, the crawler will automatically try to fetch the robots.txt file
+                for each domain, and skip those that are not allowed. This also prevents disallowed URLs to be added
+                via `EnqueueLinksFunction`
             _context_pipeline: Enables extending the request lifecycle and modifying the crawling context.
                 Intended for use by subclasses rather than direct instantiation of `BasicCrawler`.
             _additional_context_managers: Additional context managers used throughout the crawler lifecycle.
@@ -335,6 +346,7 @@ def __init__(
         self._max_requests_per_crawl = max_requests_per_crawl
         self._max_session_rotations = max_session_rotations
         self._max_crawl_depth = max_crawl_depth
+        self._respect_robots_txt_file = respect_robots_txt_file
 
         # Timeouts
         self._request_handler_timeout = request_handler_timeout
@@ -371,6 +383,8 @@ def __init__(
         self._additional_context_managers = _additional_context_managers or []
 
         # Internal, not explicitly configurable components
+        self._robots_txt_file_cache: LRUCache[str, RobotsTxtFile] = LRUCache(maxsize=1000)
+        self._robots_txt_lock = asyncio.Lock()
         self._tld_extractor = TLDExtract(cache_dir=tempfile.TemporaryDirectory().name)
         self._snapshotter = Snapshotter.from_config(config)
         self._autoscaled_pool = AutoscaledPool(
@@ -645,10 +659,25 @@ async def add_requests(
             wait_for_all_requests_to_be_added: If True, wait for all requests to be added before returning.
             wait_for_all_requests_to_be_added_timeout: Timeout for waiting for all requests to be added.
         """
+        allowed_requests = []
+        skipped = []
+
+        for request in requests:
+            check_url = request.url if isinstance(request, Request) else request
+            if await self._is_allowed_based_on_robots_txt_file(check_url):
+                allowed_requests.append(request)
+            else:
+                skipped.append(request)
+
+        if skipped:
+            # TODO: https://github.com/apify/crawlee-python/issues/1160
+            # add processing with on_skipped_request hook
+            self._logger.warning('Some requests were skipped because they were disallowed based on the robots.txt file')
+
         request_manager = await self.get_request_manager()
 
         await request_manager.add_requests_batched(
-            requests=requests,
+            requests=allowed_requests,
             batch_size=batch_size,
             wait_time_between_batches=wait_time_between_batches,
             wait_for_all_requests_to_be_added=wait_for_all_requests_to_be_added,
@@ -1080,6 +1109,22 @@ async def __run_task_function(self) -> None:
         if request is None:
             return
 
+        if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
+            self._logger.warning(
+                f'Skipping request {request.url} ({request.id}) because it is disallowed based on robots.txt'
+            )
+            await wait_for(
+                lambda: request_manager.mark_request_as_handled(request),
+                timeout=self._internal_timeout,
+                timeout_message='Marking request as handled timed out after '
+                f'{self._internal_timeout.total_seconds()} seconds',
+                logger=self._logger,
+                max_retries=3,
+            )
+            # TODO: https://github.com/apify/crawlee-python/issues/1160
+            # add processing with on_skipped_request hook
+            return
+
         if request.session_id:
             session = await self._get_session_by_id(request.session_id)
         else:
@@ -1263,3 +1308,38 @@ def _check_request_collision(self, request: Request, session: Session | None) ->
             raise RequestCollisionError(
                 f'The Session (id: {request.session_id}) bound to the Request is no longer available in SessionPool'
             )
+
+    async def _is_allowed_based_on_robots_txt_file(self, url: str) -> bool:
+        """Check if the URL is allowed based on the robots.txt file.
+
+        Args:
+            url: The URL to verify against robots.txt rules. Returns True if crawling this URL is permitted.
+        """
+        if not self._respect_robots_txt_file:
+            return True
+        robots_txt_file = await self._get_robots_txt_file_for_url(url)
+        return not robots_txt_file or robots_txt_file.is_allowed(url)
+
+    async def _get_robots_txt_file_for_url(self, url: str) -> RobotsTxtFile | None:
+        """Get the RobotsTxtFile for a given URL.
+
+        Args:
+            url: The URL whose domain will be used to locate and fetch the corresponding robots.txt file.
+        """
+        if not self._respect_robots_txt_file:
+            return None
+        origin_url = str(URL(url).origin())
+        robots_txt_file = self._robots_txt_file_cache.get(origin_url)
+        if robots_txt_file:
+            return robots_txt_file
+
+        async with self._robots_txt_lock:
+            # Check again if the robots.txt file is already cached after acquiring the lock
+            robots_txt_file = self._robots_txt_file_cache.get(origin_url)
+            if robots_txt_file:
+                return robots_txt_file
+
+            # If not cached, fetch the robots.txt file
+            robots_txt_file = await RobotsTxtFile.find(url, self._http_client)
+            self._robots_txt_file_cache[origin_url] = robots_txt_file
+            return robots_txt_file
diff --git a/src/crawlee/crawlers/_playwright/_playwright_crawler.py b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
index f923229087..8981498906 100644
--- a/src/crawlee/crawlers/_playwright/_playwright_crawler.py
+++ b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
@@ -290,6 +290,8 @@ async def extract_links(
 
             elements = await context.page.query_selector_all(selector)
 
+            robots_txt_file = await self._get_robots_txt_file_for_url(context.request.url)
+
             for element in elements:
                 url = await element.get_attribute('href')
 
@@ -300,6 +302,11 @@ async def extract_links(
                         base_url = context.request.loaded_url or context.request.url
                         url = convert_to_absolute_url(base_url, url)
 
+                    if robots_txt_file and not robots_txt_file.is_allowed(url):
+                        # TODO: https://github.com/apify/crawlee-python/issues/1160
+                        # add processing with on_skipped_request hook
+                        continue
+
                     request_option = RequestOptions({'url': url, 'user_data': {**base_user_data}, 'label': label})
 
                     if transform_request_function:
diff --git a/src/crawlee/storage_clients/_memory/_request_queue_client.py b/src/crawlee/storage_clients/_memory/_request_queue_client.py
index 0031e54abd..477d53df07 100644
--- a/src/crawlee/storage_clients/_memory/_request_queue_client.py
+++ b/src/crawlee/storage_clients/_memory/_request_queue_client.py
@@ -8,7 +8,7 @@
 from logging import getLogger
 from typing import TYPE_CHECKING
 
-from sortedcollections import ValueSortedDict  # type: ignore[import-untyped]
+from sortedcollections import ValueSortedDict
 from typing_extensions import override
 
 from crawlee._types import StorageTypes
diff --git a/tests/unit/_utils/test_robots.py b/tests/unit/_utils/test_robots.py
new file mode 100644
index 0000000000..61dc60daa5
--- /dev/null
+++ b/tests/unit/_utils/test_robots.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from crawlee._utils.robots import RobotsTxtFile
+
+if TYPE_CHECKING:
+    from yarl import URL
+
+    from crawlee.http_clients._base import HttpClient
+
+
+async def test_generation_robots_txt_url(server_url: URL, http_client: HttpClient) -> None:
+    robots_file = await RobotsTxtFile.find(str(server_url), http_client)
+    assert len(robots_file.get_sitemaps()) > 0
+
+
+async def test_allow_disallow_robots_txt(server_url: URL, http_client: HttpClient) -> None:
+    robots = await RobotsTxtFile.find(str(server_url), http_client)
+    assert robots.is_allowed('https://crawlee.dev')
+    assert robots.is_allowed(str(server_url / 'something/page.html'))
+    assert robots.is_allowed(str(server_url / 'deny_googlebot/page.html'))
+    assert not robots.is_allowed(str(server_url / 'deny_all/page.html'))
+
+
+async def test_extract_sitemaps_urls(server_url: URL, http_client: HttpClient) -> None:
+    robots = await RobotsTxtFile.find(str(server_url), http_client)
+    assert len(robots.get_sitemaps()) == 2
+    assert set(robots.get_sitemaps()) == {'http://not-exists.com/sitemap_1.xml', 'http://not-exists.com/sitemap_2.xml'}
+
+
+async def test_parse_from_content() -> None:
+    content = """User-agent: *
+        Disallow: *deny_all/
+        crawl-delay: 10
+        User-agent: Googlebot
+        Disallow: *deny_googlebot/"""
+    robots = await RobotsTxtFile.from_content('http://not-exists.com/robots.txt', content)
+    assert robots.is_allowed('http://not-exists.com/something/page.html')
+    assert robots.is_allowed('http://not-exists.com/deny_googlebot/page.html')
+    assert not robots.is_allowed('http://not-exists.com/deny_googlebot/page.html', 'Googlebot')
+    assert not robots.is_allowed('http://not-exists.com/deny_all/page.html')
+
+
+async def test_bind_robots_txt_url() -> None:
+    content = 'User-agent: *\nDisallow: /'
+    robots = await RobotsTxtFile.from_content('http://check.com/robots.txt', content)
+    assert not robots.is_allowed('http://check.com/test.html')
+    assert robots.is_allowed('http://othercheck.com/robots.txt')
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index ab7a219ef7..40f57de5ea 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -12,13 +12,14 @@
 from datetime import timedelta
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, cast
-from unittest.mock import AsyncMock, Mock, call
+from unittest.mock import AsyncMock, Mock, call, patch
 
 import pytest
 
 from crawlee import ConcurrencySettings, Glob, service_locator
 from crawlee._request import Request
 from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpHeaders
+from crawlee._utils.robots import RobotsTxtFile
 from crawlee.configuration import Configuration
 from crawlee.crawlers import BasicCrawler
 from crawlee.errors import RequestCollisionError, SessionError, UserDefinedErrorHandlerError
@@ -1310,3 +1311,15 @@ async def failed_request_handler(context: BasicCrawlingContext, error: Exception
     await crawler.run(requests)
 
     assert set(requests) == handler_requests
+
+
+async def test_lock_with_get_robots_txt_file_for_url(server_url: URL) -> None:
+    crawler = BasicCrawler(respect_robots_txt_file=True)
+
+    with patch('crawlee.crawlers._basic._basic_crawler.RobotsTxtFile.find', wraps=RobotsTxtFile.find) as spy:
+        await asyncio.gather(
+            *[asyncio.create_task(crawler._get_robots_txt_file_for_url(str(server_url))) for _ in range(10)]
+        )
+
+        # Check that the lock was acquired only once
+        assert spy.call_count == 1
diff --git a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
index 167391dc6f..b73ea4aeaa 100644
--- a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
+++ b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
@@ -142,3 +142,21 @@ async def test_handle_blocked_request(server_url: URL, http_client: HttpClient)
 
 def test_default_logger() -> None:
     assert BeautifulSoupCrawler().log.name == 'BeautifulSoupCrawler'
+
+
+async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> None:
+    crawler = BeautifulSoupCrawler(http_client=http_client, respect_robots_txt_file=True)
+    visit = mock.Mock()
+
+    @crawler.router.default_handler
+    async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
+        visit(context.request.url)
+        await context.enqueue_links()
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+    visited = {call[0][0] for call in visit.call_args_list}
+
+    assert visited == {
+        str(server_url / 'start_enqueue'),
+        str(server_url / 'sub_index'),
+    }
diff --git a/tests/unit/crawlers/_parsel/test_parsel_crawler.py b/tests/unit/crawlers/_parsel/test_parsel_crawler.py
index 477c091050..586962eac7 100644
--- a/tests/unit/crawlers/_parsel/test_parsel_crawler.py
+++ b/tests/unit/crawlers/_parsel/test_parsel_crawler.py
@@ -239,3 +239,21 @@ async def request_handler(context: ParselCrawlingContext) -> None:
 
 def test_default_logger() -> None:
     assert ParselCrawler().log.name == 'ParselCrawler'
+
+
+async def test_respect_robots_txt(server_url: URL, http_client: HttpClient) -> None:
+    crawler = ParselCrawler(http_client=http_client, respect_robots_txt_file=True)
+    visit = mock.Mock()
+
+    @crawler.router.default_handler
+    async def request_handler(context: ParselCrawlingContext) -> None:
+        visit(context.request.url)
+        await context.enqueue_links()
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+    visited = {call[0][0] for call in visit.call_args_list}
+
+    assert visited == {
+        str(server_url / 'start_enqueue'),
+        str(server_url / 'sub_index'),
+    }
diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index aaf8fcaad2..3dbca017a6 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -598,3 +598,21 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
     assert crawler.statistics.error_tracker.total == 3 * max_retries
     assert crawler.statistics.error_tracker.unique_error_count == 2
     assert len(kvs_content) == 4
+
+
+async def test_respect_robots_txt(server_url: URL) -> None:
+    crawler = PlaywrightCrawler(respect_robots_txt_file=True)
+    visit = mock.Mock()
+
+    @crawler.router.default_handler
+    async def request_handler(context: PlaywrightCrawlingContext) -> None:
+        visit(context.request.url)
+        await context.enqueue_links()
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+    visited = {call[0][0] for call in visit.call_args_list}
+
+    assert visited == {
+        str(server_url / 'start_enqueue'),
+        str(server_url / 'sub_index'),
+    }
diff --git a/tests/unit/server.py b/tests/unit/server.py
index 29e789d013..21ba01cec8 100644
--- a/tests/unit/server.py
+++ b/tests/unit/server.py
@@ -11,7 +11,14 @@
 from uvicorn.server import Server
 from yarl import URL
 
-from tests.unit.server_endpoints import GENERIC_RESPONSE, HELLO_WORLD, INCAPSULA, SECONDARY_INDEX, START_ENQUEUE
+from tests.unit.server_endpoints import (
+    GENERIC_RESPONSE,
+    HELLO_WORLD,
+    INCAPSULA,
+    ROBOTS_TXT,
+    SECONDARY_INDEX,
+    START_ENQUEUE,
+)
 
 if TYPE_CHECKING:
     from socket import socket
@@ -120,6 +127,8 @@ async def app(scope: dict[str, Any], receive: Receive, send: Send) -> None:
         await hello_world_json(send)
     elif path.startswith('/xml'):
         await hello_world_xml(send)
+    elif path.startswith('/robots.txt'):
+        await robots_txt(send)
     else:
         await hello_world(send)
 
@@ -366,6 +375,11 @@ async def dynamic_content(scope: dict[str, Any], send: Send) -> None:
     await send_html_response(send, html_content=content.encode())
 
 
+async def robots_txt(send: Send) -> None:
+    """Handle requests for the robots.txt file."""
+    await send_html_response(send, ROBOTS_TXT)
+
+
 class TestServer(Server):
     """A test HTTP server implementation based on Uvicorn Server."""
 
diff --git a/tests/unit/server_endpoints.py b/tests/unit/server_endpoints.py
index 00456d3dcd..a9f48e6e47 100644
--- a/tests/unit/server_endpoints.py
+++ b/tests/unit/server_endpoints.py
@@ -41,3 +41,20 @@
 <body>
     Insightful content
 </body></html>"""
+
+
+ROBOTS_TXT = b"""\
+User-agent: *
+Disallow: *deny_all/
+Disallow: /page_
+crawl-delay: 10
+
+User-agent: Googlebot
+Disallow: *deny_googlebot/
+crawl-delay: 1
+
+user-agent: Mozilla
+crawl-delay: 2
+
+sitemap: http://not-exists.com/sitemap_1.xml
+sitemap: http://not-exists.com/sitemap_2.xml"""
diff --git a/uv.lock b/uv.lock
index 392d5b63fa..77035eff2c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -600,7 +600,7 @@ toml = [
 
 [[package]]
 name = "crawlee"
-version = "0.6.7"
+version = "0.6.8"
 source = { editable = "." }
 dependencies = [
     { name = "apify-fingerprint-datapoints" },
@@ -610,6 +610,7 @@ dependencies = [
     { name = "eval-type-backport" },
     { name = "httpx", extra = ["brotli", "http2", "zstd"] },
     { name = "more-itertools" },
+    { name = "protego" },
     { name = "psutil" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
@@ -711,6 +712,7 @@ requires-dist = [
     { name = "playwright", marker = "extra == 'adaptive-crawler'", specifier = ">=1.27.0" },
     { name = "playwright", marker = "extra == 'all'", specifier = ">=1.27.0" },
     { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.27.0" },
+    { name = "protego", specifier = ">=0.4.0" },
     { name = "psutil", specifier = ">=6.0.0" },
     { name = "pydantic", specifier = ">=2.8.0,!=2.10.0,!=2.10.1,!=2.10.2" },
     { name = "pydantic-settings", specifier = ">=2.2.0,<2.7.0" },
@@ -1950,6 +1952,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b8/d3/c3cb8f1d6ae3b37f83e1de806713a9b3642c5895f0215a62e1a4bd6e5e34/propcache-0.3.1-py3-none-any.whl", hash = "sha256:9a8ecf38de50a7f518c21568c80f985e776397b902f1ce0b01f799aba1608b40", size = 12376, upload_time = "2025-03-26T03:06:10.5Z" },
 ]
 
+[[package]]
+name = "protego"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/6b/84e878d0567dfc11538bad6ce2595cee7ae0c47cf6bf7293683c9ec78ef8/protego-0.4.0.tar.gz", hash = "sha256:93a5e662b61399a0e1f208a324f2c6ea95b23ee39e6cbf2c96246da4a656c2f6", size = 3246425, upload_time = "2025-01-17T15:48:21.644Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/fd/8d84d75832b0983cecf3aff7ae48362fe96fc8ab6ebca9dcf3cefd87e79c/Protego-0.4.0-py2.py3-none-any.whl", hash = "sha256:37640bc0ebe37572d624453a21381d05e9d86e44f89ff1e81794d185a0491666", size = 8553, upload_time = "2025-01-17T15:48:18.332Z" },
+]
+
 [[package]]
 name = "proxy-py"
 version = "2.4.10"