Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions src/crawlee/_utils/system.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

import os
import sys
from contextlib import suppress
from datetime import datetime, timezone
from logging import getLogger
from typing import Annotated, Any
from typing import Annotated

import psutil
from pydantic import BaseModel, ConfigDict, Field, PlainSerializer, PlainValidator
Expand All @@ -13,6 +14,24 @@

logger = getLogger(__name__)

if sys.platform == 'linux':
"""Get the most suitable available used memory metric.

`Proportional Set Size (PSS)`, is the amount of own memory and memory shared with other processes, accounted in a
way that the shared amount is divided evenly between the processes that share it. Available on Linux. Suitable for
avoiding overestimation by counting the same shared memory used by children processes multiple times.

`Resident Set Size (RSS)` is the non-swapped physical memory a process has used; it includes shared memory. It
should be available everywhere.
"""

def _get_used_memory(process: psutil.Process) -> int:
return int(process.memory_full_info().pss)
else:

def _get_used_memory(process: psutil.Process) -> int:
return int(process.memory_info().rss)


class CpuInfo(BaseModel):
"""Information about the CPU usage."""
Expand Down Expand Up @@ -88,14 +107,14 @@ def get_memory_info() -> MemoryInfo:
current_process = psutil.Process(os.getpid())

# Retrieve estimated memory usage of the current process.
current_size_bytes = int(_get_used_memory(current_process.memory_full_info()))
current_size_bytes = _get_used_memory(current_process)

# Sum memory usage by all children processes, try to exclude shared memory from the sum if allowed by OS.
for child in current_process.children(recursive=True):
# Ignore any NoSuchProcess exception that might occur if a child process ends before we retrieve
# its memory usage.
with suppress(psutil.NoSuchProcess):
current_size_bytes += _get_used_memory(child.memory_full_info())
current_size_bytes += _get_used_memory(child)

vm = psutil.virtual_memory()

Expand All @@ -104,20 +123,3 @@ def get_memory_info() -> MemoryInfo:
current_size=ByteSize(current_size_bytes),
system_wide_used_size=ByteSize(vm.total - vm.available),
)


def _get_used_memory(memory_full_info: Any) -> int:
"""Get the most suitable available used memory metric.

`Proportional Set Size (PSS)`, is the amount of own memory and memory shared with other processes, accounted in a
way that the shared amount is divided evenly between the processes that share it. Available on Linux. Suitable for
avoiding overestimation by counting the same shared memory used by children processes multiple times.

`Resident Set Size (RSS)` is the non-swapped physical memory a process has used; it includes shared memory. It
should be available everywhere.
"""
try:
# Linux
return int(memory_full_info.pss)
except AttributeError:
return int(memory_full_info.rss)
4 changes: 2 additions & 2 deletions tests/unit/_utils/test_system.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

import os
import sys
from multiprocessing import Barrier, Process, Value, synchronize
from multiprocessing.shared_memory import SharedMemory
from typing import TYPE_CHECKING
Expand All @@ -26,7 +26,7 @@ def test_get_cpu_info_returns_valid_values() -> None:
assert 0 <= cpu_info.used_ratio <= 1


@pytest.mark.skipif(os.name == 'nt', reason='Improved estimation not available on Windows')
@pytest.mark.skipif(sys.platform != 'linux', reason='Improved estimation available only on Linux')
def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None:
"""Test that memory usage estimation is not overestimating memory usage by counting shared memory multiple times.

Expand Down
Loading