diff --git a/src/crawlee/_utils/measure_time.py b/src/crawlee/_utils/measure_time.py deleted file mode 100644 index 6a4a172c4f..0000000000 --- a/src/crawlee/_utils/measure_time.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import annotations - -import time -from contextlib import contextmanager -from dataclasses import dataclass -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Iterator - - -@dataclass -class TimerResult: - wall: float | None = None - cpu: float | None = None - - -@contextmanager -def measure_time() -> Iterator[TimerResult]: - """Measure the execution time (wall-clock and CPU) between the start and end of the with-block.""" - result = TimerResult() - before_wall = time.monotonic() - before_cpu = time.thread_time() - - try: - yield result - finally: - after_wall = time.monotonic() - after_cpu = time.thread_time() - result.wall = after_wall - before_wall - result.cpu = after_cpu - before_cpu diff --git a/src/crawlee/_utils/time.py b/src/crawlee/_utils/time.py new file mode 100644 index 0000000000..dc2521f6b5 --- /dev/null +++ b/src/crawlee/_utils/time.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import time +from contextlib import contextmanager +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + from datetime import timedelta + +_SECONDS_PER_MINUTE = 60 +_SECONDS_PER_HOUR = 3600 + + +@dataclass +class TimerResult: + wall: float | None = None + cpu: float | None = None + + +@contextmanager +def measure_time() -> Iterator[TimerResult]: + """Measure the execution time (wall-clock and CPU) between the start and end of the with-block.""" + result = TimerResult() + before_wall = time.monotonic() + before_cpu = time.thread_time() + + try: + yield result + finally: + after_wall = time.monotonic() + after_cpu = time.thread_time() + result.wall = after_wall - before_wall + result.cpu = after_cpu - before_cpu + + +def format_duration(duration: timedelta | None) -> str: + """Format a timedelta into a human-readable string with appropriate units.""" + if duration is None: + return 'None' + + total_seconds = duration.total_seconds() + + if total_seconds == 0: + return '0s' + + # For very small durations, show in milliseconds + if total_seconds < 1: + milliseconds = total_seconds * 1000 + if milliseconds < 1: + microseconds = total_seconds * 1_000_000 + return f'{microseconds:.1f}μs' + return f'{milliseconds:.1f}ms' + + # For durations less than 60 seconds, show in seconds + if total_seconds < _SECONDS_PER_MINUTE: + return f'{total_seconds:.2f}s' + + # For durations less than 1 hour, show in minutes and seconds + if total_seconds < _SECONDS_PER_HOUR: + minutes = int(total_seconds // _SECONDS_PER_MINUTE) + seconds = total_seconds % _SECONDS_PER_MINUTE + if seconds == 0: + return f'{minutes}min' + return f'{minutes}min {seconds:.1f}s' + + # For longer durations, show in hours, minutes, and seconds + hours = int(total_seconds // _SECONDS_PER_HOUR) + remaining_seconds = total_seconds % _SECONDS_PER_HOUR + minutes = int(remaining_seconds // _SECONDS_PER_MINUTE) + seconds = remaining_seconds % _SECONDS_PER_MINUTE + + result = f'{hours}h' + if minutes > 0: + result += f' {minutes}min' + if seconds > 0: + result += f' {seconds:.1f}s' + + return result diff --git a/src/crawlee/statistics/_models.py b/src/crawlee/statistics/_models.py index 7a3f5c3234..19baab66a6 100644 --- a/src/crawlee/statistics/_models.py +++ b/src/crawlee/statistics/_models.py @@ -11,6 +11,9 @@ from crawlee._utils.console import make_table from crawlee._utils.docs import docs_group from crawlee._utils.models import timedelta_ms +from crawlee._utils.time import format_duration + +_STATISTICS_TABLE_WIDTH = 100 @dataclass(frozen=True) @@ -31,9 +34,14 @@ class FinalStatistics: def to_table(self) -> str: """Print out the Final Statistics data as a table.""" - str_dict = {k: v.total_seconds() if isinstance(v, timedelta) else v for k, v in asdict(self).items()} - - return make_table([(str(k), str(v)) for k, v in str_dict.items()], width=60) + formatted_dict = {} + for k, v in asdict(self).items(): + if isinstance(v, timedelta): + formatted_dict[k] = format_duration(v) + else: + formatted_dict[k] = v + + return make_table([(str(k), str(v)) for k, v in formatted_dict.items()], width=_STATISTICS_TABLE_WIDTH) def to_dict(self) -> dict[str, float | int | list[int]]: return {k: v.total_seconds() if isinstance(v, timedelta) else v for k, v in asdict(self).items()} diff --git a/tests/unit/_autoscaling/test_autoscaled_pool.py b/tests/unit/_autoscaling/test_autoscaled_pool.py index b4e82fee76..d8c6b22c10 100644 --- a/tests/unit/_autoscaling/test_autoscaled_pool.py +++ b/tests/unit/_autoscaling/test_autoscaled_pool.py @@ -14,7 +14,7 @@ from crawlee._autoscaling import AutoscaledPool, SystemStatus from crawlee._autoscaling._types import LoadRatioInfo, SystemInfo from crawlee._types import ConcurrencySettings -from crawlee._utils.measure_time import measure_time +from crawlee._utils.time import measure_time if TYPE_CHECKING: from collections.abc import Awaitable diff --git a/tests/unit/_utils/test_measure_time.py b/tests/unit/_utils/test_measure_time.py index 4d2d41b6af..05f1ae063c 100644 --- a/tests/unit/_utils/test_measure_time.py +++ b/tests/unit/_utils/test_measure_time.py @@ -3,7 +3,7 @@ import asyncio import time -from crawlee._utils.measure_time import measure_time +from crawlee._utils.time import measure_time def test_measure_time_wall_sync() -> None: diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py index aa7eabf29e..a77d92d8fc 100644 --- a/tests/unit/crawlers/_basic/test_basic_crawler.py +++ b/tests/unit/crawlers/_basic/test_basic_crawler.py @@ -951,18 +951,18 @@ async def handler(context: BasicCrawlingContext) -> None: if statistics_log_format == 'table': assert final_statistics.msg.splitlines() == [ 'Final request statistics:', - '┌───────────────────────────────┬───────────┐', - '│ requests_finished │ 4 │', - '│ requests_failed │ 33 │', - '│ retry_histogram │ [1, 4, 8] │', - '│ request_avg_failed_duration │ 99.0 │', - '│ request_avg_finished_duration │ 0.483 │', - '│ requests_finished_per_minute │ 0.33 │', - '│ requests_failed_per_minute │ 0.1 │', - '│ request_total_duration │ 720.0 │', - '│ requests_total │ 37 │', - '│ crawler_runtime │ 300.0 │', - '└───────────────────────────────┴───────────┘', + '┌───────────────────────────────┬────────────┐', + '│ requests_finished │ 4 │', + '│ requests_failed │ 33 │', + '│ retry_histogram │ [1, 4, 8] │', + '│ request_avg_failed_duration │ 1min 39.0s │', + '│ request_avg_finished_duration │ 483.0ms │', + '│ requests_finished_per_minute │ 0.33 │', + '│ requests_failed_per_minute │ 0.1 │', + '│ request_total_duration │ 12min │', + '│ requests_total │ 37 │', + '│ crawler_runtime │ 5min │', + '└───────────────────────────────┴────────────┘', ] else: assert final_statistics.msg == 'Final request statistics:'