diff --git a/src/crawlee/statistics/_statistics.py b/src/crawlee/statistics/_statistics.py index 3d95a889f7..02c41a7b3e 100644 --- a/src/crawlee/statistics/_statistics.py +++ b/src/crawlee/statistics/_statistics.py @@ -2,6 +2,7 @@ from __future__ import annotations import math +import time from datetime import datetime, timedelta, timezone from logging import Logger, getLogger from typing import TYPE_CHECKING, Generic, Literal @@ -27,22 +28,22 @@ class RequestProcessingRecord: """Tracks information about the processing of a request.""" def __init__(self) -> None: - self._last_run_at: datetime | None = None + self._last_run_at_ns: int | None = None self._runs = 0 self.duration: timedelta | None = None def run(self) -> int: """Mark the job as started.""" - self._last_run_at = datetime.now(timezone.utc) + self._last_run_at_ns = time.perf_counter_ns() self._runs += 1 return self._runs def finish(self) -> timedelta: """Mark the job as finished.""" - if self._last_run_at is None: + if self._last_run_at_ns is None: raise RuntimeError('Invalid state') - self.duration = datetime.now(timezone.utc) - self._last_run_at + self.duration = timedelta(microseconds=math.ceil((time.perf_counter_ns() - self._last_run_at_ns) / 1000)) return self.duration @property diff --git a/tests/unit/_statistics/test_request_processing_record.py b/tests/unit/_statistics/test_request_processing_record.py new file mode 100644 index 0000000000..d5803e67eb --- /dev/null +++ b/tests/unit/_statistics/test_request_processing_record.py @@ -0,0 +1,17 @@ +from datetime import timedelta + +from crawlee.statistics._statistics import RequestProcessingRecord + + +def test_tracking_time_resolution() -> None: + """Test that `RequestProcessingRecord` tracks time with sufficient resolution. + + This is generally not an issue on Linux, but on Windows some packages in older Python versions might be using system + timers with not so granular resolution - some sources estimate 15ms. This test will start failing on Windows + if unsuitable source of time measurement is selected due to two successive time measurements possibly using same + timing sample.""" + record = RequestProcessingRecord() + record.run() + record.finish() + assert record.duration + assert record.duration > timedelta(seconds=0)