Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions src/crawlee/_utils/measure_time.py

This file was deleted.

80 changes: 80 additions & 0 deletions src/crawlee/_utils/time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import annotations

import time
from contextlib import contextmanager
from dataclasses import dataclass
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Iterator
from datetime import timedelta

_SECONDS_PER_MINUTE = 60
_SECONDS_PER_HOUR = 3600


@dataclass
class TimerResult:
wall: float | None = None
cpu: float | None = None


@contextmanager
def measure_time() -> Iterator[TimerResult]:
"""Measure the execution time (wall-clock and CPU) between the start and end of the with-block."""
result = TimerResult()
before_wall = time.monotonic()
before_cpu = time.thread_time()

try:
yield result
finally:
after_wall = time.monotonic()
after_cpu = time.thread_time()
result.wall = after_wall - before_wall
result.cpu = after_cpu - before_cpu


def format_duration(duration: timedelta | None) -> str:
"""Format a timedelta into a human-readable string with appropriate units."""
if duration is None:
return 'None'

total_seconds = duration.total_seconds()

if total_seconds == 0:
return '0s'

# For very small durations, show in milliseconds
if total_seconds < 1:
milliseconds = total_seconds * 1000
if milliseconds < 1:
microseconds = total_seconds * 1_000_000
return f'{microseconds:.1f}μs'
return f'{milliseconds:.1f}ms'

# For durations less than 60 seconds, show in seconds
if total_seconds < _SECONDS_PER_MINUTE:
return f'{total_seconds:.2f}s'

# For durations less than 1 hour, show in minutes and seconds
if total_seconds < _SECONDS_PER_HOUR:
minutes = int(total_seconds // _SECONDS_PER_MINUTE)
seconds = total_seconds % _SECONDS_PER_MINUTE
if seconds == 0:
return f'{minutes}min'
return f'{minutes}min {seconds:.1f}s'

# For longer durations, show in hours, minutes, and seconds
hours = int(total_seconds // _SECONDS_PER_HOUR)
remaining_seconds = total_seconds % _SECONDS_PER_HOUR
minutes = int(remaining_seconds // _SECONDS_PER_MINUTE)
seconds = remaining_seconds % _SECONDS_PER_MINUTE

result = f'{hours}h'
if minutes > 0:
result += f' {minutes}min'
if seconds > 0:
result += f' {seconds:.1f}s'

return result
14 changes: 11 additions & 3 deletions src/crawlee/statistics/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from crawlee._utils.console import make_table
from crawlee._utils.docs import docs_group
from crawlee._utils.models import timedelta_ms
from crawlee._utils.time import format_duration

_STATISTICS_TABLE_WIDTH = 100


@dataclass(frozen=True)
Expand All @@ -31,9 +34,14 @@ class FinalStatistics:

def to_table(self) -> str:
"""Print out the Final Statistics data as a table."""
str_dict = {k: v.total_seconds() if isinstance(v, timedelta) else v for k, v in asdict(self).items()}

return make_table([(str(k), str(v)) for k, v in str_dict.items()], width=60)
formatted_dict = {}
for k, v in asdict(self).items():
if isinstance(v, timedelta):
formatted_dict[k] = format_duration(v)
else:
formatted_dict[k] = v

return make_table([(str(k), str(v)) for k, v in formatted_dict.items()], width=_STATISTICS_TABLE_WIDTH)

def to_dict(self) -> dict[str, float | int | list[int]]:
return {k: v.total_seconds() if isinstance(v, timedelta) else v for k, v in asdict(self).items()}
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/_autoscaling/test_autoscaled_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from crawlee._autoscaling import AutoscaledPool, SystemStatus
from crawlee._autoscaling._types import LoadRatioInfo, SystemInfo
from crawlee._types import ConcurrencySettings
from crawlee._utils.measure_time import measure_time
from crawlee._utils.time import measure_time

if TYPE_CHECKING:
from collections.abc import Awaitable
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/_utils/test_measure_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import asyncio
import time

from crawlee._utils.measure_time import measure_time
from crawlee._utils.time import measure_time


def test_measure_time_wall_sync() -> None:
Expand Down
24 changes: 12 additions & 12 deletions tests/unit/crawlers/_basic/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,18 +951,18 @@ async def handler(context: BasicCrawlingContext) -> None:
if statistics_log_format == 'table':
assert final_statistics.msg.splitlines() == [
'Final request statistics:',
'┌───────────────────────────────┬───────────┐',
'│ requests_finished │ 4 │',
'│ requests_failed │ 33 │',
'│ retry_histogram │ [1, 4, 8] │',
'│ request_avg_failed_duration │ 99.0 │',
'│ request_avg_finished_duration │ 0.483 │',
'│ requests_finished_per_minute │ 0.33 │',
'│ requests_failed_per_minute │ 0.1 │',
'│ request_total_duration │ 720.0 │',
'│ requests_total │ 37 │',
'│ crawler_runtime │ 300.0 │',
'└───────────────────────────────┴───────────┘',
'┌───────────────────────────────┬───────────┐',
'│ requests_finished │ 4 │',
'│ requests_failed │ 33 │',
'│ retry_histogram │ [1, 4, 8] │',
'│ request_avg_failed_duration │ 1min 39.0s │',
'│ request_avg_finished_duration │ 483.0ms │',
'│ requests_finished_per_minute │ 0.33 │',
'│ requests_failed_per_minute │ 0.1 │',
'│ request_total_duration │ 12min │',
'│ requests_total │ 37 │',
'│ crawler_runtime │ 5min │',
'└───────────────────────────────┴───────────┘',
]
else:
assert final_statistics.msg == 'Final request statistics:'
Expand Down
Loading