Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/crawlee/statistics/_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import annotations

import math
import time
from datetime import datetime, timedelta, timezone
from logging import Logger, getLogger
from typing import TYPE_CHECKING, Generic, Literal
Expand All @@ -27,22 +28,22 @@ class RequestProcessingRecord:
"""Tracks information about the processing of a request."""

def __init__(self) -> None:
self._last_run_at: datetime | None = None
self._last_run_at_ns: int | None = None
self._runs = 0
self.duration: timedelta | None = None

def run(self) -> int:
"""Mark the job as started."""
self._last_run_at = datetime.now(timezone.utc)
self._last_run_at_ns = time.perf_counter_ns()
self._runs += 1
return self._runs

def finish(self) -> timedelta:
"""Mark the job as finished."""
if self._last_run_at is None:
if self._last_run_at_ns is None:
raise RuntimeError('Invalid state')

self.duration = datetime.now(timezone.utc) - self._last_run_at
self.duration = timedelta(microseconds=math.ceil((time.perf_counter_ns() - self._last_run_at_ns) / 1000))
return self.duration

@property
Expand Down
17 changes: 17 additions & 0 deletions tests/unit/_statistics/test_request_processing_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from datetime import timedelta

from crawlee.statistics._statistics import RequestProcessingRecord


def test_tracking_time_resolution() -> None:
"""Test that `RequestProcessingRecord` tracks time with sufficient resolution.

This is generally not an issue on Linux, but on Windows some packages in older Python versions might be using system
timers with not so granular resolution - some sources estimate 15ms. This test will start failing on Windows
if unsuitable source of time measurement is selected due to two successive time measurements possibly using same
timing sample."""
record = RequestProcessingRecord()
record.run()
record.finish()
assert record.duration
assert record.duration > timedelta(seconds=0)
Loading