Skip to content

Commit

Permalink
perf: Cache module dirs
Browse files Browse the repository at this point in the history
Calling `dir` on modules is expensive - in a benchmark, it took up 2% of the
time when calling `freeze_time`, when `_get_cached_module_attributes` accounted
for 4% of the time.

This commit hashes `dir` calls per module id. This drastically speeds it
up. Running with `pytest-benchmark`:

```python
import time
from datetime import datetime, timezone
from unittest.mock import patch

from freezegun import freeze_time

def setup_with_freezegun() -> None:
    current = datetime(2025, 1, 1, tzinfo=timezone.utc)
    with freeze_time(current):
        time.time()

def setup_with_patch() -> None:
    current = datetime(2025, 1, 1, tzinfo=timezone.utc)
    current_time = current.timestamp()
    with patch("time.time", return_value=current_time):
        time.time()

def test_benchmark_freezegun(benchmark) -> None:
    benchmark(setup_with_freezegun)

def test_benchmark_patch(benchmark) -> None:
    benchmark(setup_with_patch)
```

We get the following:
```
----------------------------------------------------------------------------------------------------- benchmark: 4 tests -----------------------------------------------------------------------------------------------------
Name (time in us)                                  Min                   Max                  Mean              StdDev                Median                IQR            Outliers          OPS            Rounds  Iterations
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_benchmark_patch (0001_5f171db)            62.3360 (1.0)      8,936.2020 (14.71)       84.5774 (1.0)      175.7804 (7.06)        70.2175 (1.0)      13.8290 (1.0)         5;839  11,823.4918 (1.0)        4730           1
test_benchmark_patch (0002_5f171db)            62.3370 (1.00)     9,183.8990 (15.11)       86.2260 (1.02)     192.4761 (7.73)        72.3760 (1.03)     14.2655 (1.03)        3;676  11,597.4350 (0.98)       3949           1
test_benchmark_freezegun (0002_5f171db)       376.8070 (6.04)       607.6380 (1.0)        405.6626 (4.80)      24.8866 (1.0)        400.5450 (5.70)     19.8857 (1.44)         10;4   2,465.1026 (0.21)        121           1
test_benchmark_freezegun (0001_5f171db)     1,983.5520 (31.82)    2,392.4130 (3.94)     2,103.8566 (24.87)     74.1705 (2.98)     2,083.2020 (29.67)    69.4740 (5.02)         14;5     475.3176 (0.04)         82           1
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```

Where `0001` is before this commit, and `0002` is after: Mean time reduced 2,103
ms to 405 ms.

This comes with a tradeoff, reflected in the added tests: Dynamically added
attributes *will not be picked up*. For example:

```python
time_after_start = None

def add_after_start() -> None:
    import time
    import sys

    setattr(sys.modules[__name__], 'dynamic_time_func', time.time)
```

Because the result of `dir` is cached, the `dynamic_time_func` is not picked
up.

I leave it up to the maintainers to decide if this tradeoff is worthwhile,
considering that there are some other existing blindspots (also shown in added
tests).

This commit does not include the benchmark itself - if desired, it can trivially
be added.
  • Loading branch information
Zirak committed Jan 4, 2025
1 parent 5f171db commit 94ecee6
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 24 deletions.
78 changes: 54 additions & 24 deletions freezegun/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,52 +98,82 @@


# keep a cache of module attributes otherwise freezegun will need to analyze too many modules all the time
_GLOBAL_MODULES_CACHE: Dict[str, Tuple[str, List[Tuple[str, Any]]]] = {}
_GLOBAL_MODULES_CACHE: Dict[int, Tuple[int, List[Tuple[str, Any]]]] = {}
# Cache for module time-related attributes to avoid repeated expensive dir() calls
# Unlike GLOBAL_MODULES_CACHE, this only stores attribute *names*, not their values
_MODULE_TIME_ATTRS_CACHE: Dict[int, Set[str]] = {}


def _get_module_attributes(module: types.ModuleType) -> List[Tuple[str, Any]]:
result: List[Tuple[str, Any]] = []
def _get_module_time_attributes(module: types.ModuleType) -> Set[str]:
"""Get time-related attributes for a module, using cache if possible."""
module_id = id(module)
cached_attrs = _MODULE_TIME_ATTRS_CACHE.get(module_id, None)

if cached_attrs is not None:
return cached_attrs

try:
module_attributes = dir(module)
module_dir = dir(module)

# Find attributes that match real time objects
time_attrs = set()
for attribute_name in module_dir:
try:
attribute_value = getattr(module, attribute_name)
if id(attribute_value) in _real_time_object_ids:
time_attrs.add(attribute_name)
except (ImportError, AttributeError, TypeError):
continue

_MODULE_TIME_ATTRS_CACHE[module_id] = time_attrs
return time_attrs
except (ImportError, TypeError):
return result
for attribute_name in module_attributes:
return set()


def _get_module_attributes(module: types.ModuleType) -> List[Tuple[str, Any]]:
"""Get all time-related attributes from a module."""
result: List[Tuple[str, Any]] = []

time_attributes = _get_module_time_attributes(module)
for attribute_name in time_attributes:
try:
attribute_value = getattr(module, attribute_name)
except (ImportError, AttributeError, TypeError):
# For certain libraries, this can result in ImportError(_winreg) or AttributeError (celery)
continue
else:
result.append((attribute_name, attribute_value))
return result


def _setup_module_cache(module: types.ModuleType) -> None:
date_attrs = []
all_module_attributes = _get_module_attributes(module)
for attribute_name, attribute_value in all_module_attributes:
if id(attribute_value) in _real_time_object_ids:
date_attrs.append((attribute_name, attribute_value))
_GLOBAL_MODULES_CACHE[module.__name__] = (_get_module_attributes_hash(module), date_attrs)
def _get_module_attributes_hash(module: types.ModuleType) -> Tuple[int, List[Tuple[str, Any]]]:
"""Get a hash of module's time-related attributes."""
module_attrs = _get_module_attributes(module)

if not module_attrs:
return 0, []

module_hash = hash(frozenset(name for name, _ in module_attrs))
return module_hash, module_attrs

def _get_module_attributes_hash(module: types.ModuleType) -> str:
try:
module_dir = dir(module)
except (ImportError, TypeError):
module_dir = []
return f'{id(module)}-{hash(frozenset(module_dir))}'

def _setup_module_cache(module: types.ModuleType) -> List[Tuple[str, Any]]:
module_hash, module_attrs = _get_module_attributes_hash(module)
_GLOBAL_MODULES_CACHE[id(module)] = module_hash, module_attrs
return module_attrs


def _get_cached_module_attributes(module: types.ModuleType) -> List[Tuple[str, Any]]:
module_hash, cached_attrs = _GLOBAL_MODULES_CACHE.get(module.__name__, ('0', []))
if _get_module_attributes_hash(module) == module_hash:
module_id = id(module)
module_hash, cached_attrs = _GLOBAL_MODULES_CACHE.get(module_id, (0, []))

current_module_hash, _ = _get_module_attributes_hash(module)
if current_module_hash == module_hash:
return cached_attrs

# cache miss: update the cache and return the refreshed value
_setup_module_cache(module)
cached_attrs = _setup_module_cache(module)
# return the newly cached value
module_hash, cached_attrs = _GLOBAL_MODULES_CACHE[module.__name__]
return cached_attrs


Expand Down
13 changes: 13 additions & 0 deletions tests/dynamic_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""A module that delays importing `time` until after it's convenient for
freezegun"""

time_after_start = None

def add_after_start() -> None:
import time
import sys

global time_after_start
time_after_start = time.time()
setattr(sys.modules[__name__], 'dynamic_time', time.time())
setattr(sys.modules[__name__], 'dynamic_time_func', time.time)
29 changes: 29 additions & 0 deletions tests/test_class_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,36 @@ def test_import_after_start() -> None:
assert another_module.get_fake_strftime() is fake_strftime
del sys.modules['tests.another_module']


def test_none_as_initial() -> None:
with freeze_time() as ft:
ft.move_to('2012-01-14')
assert fake_strftime_function() == '2012'


def test_dynamic_module_reimported() -> None:
local_time = datetime.datetime(2012, 1, 14)
utc_time = local_time - datetime.timedelta(seconds=time.timezone)
expected_timestamp = time.mktime(utc_time.timetuple())

from . import dynamic_module
# Don't do anything else - just make sure it's in the cache
with freeze_time("2012-01-14"):
pass

# Now, load the module again. When we call freeze_time again it will be in
# the cache and we can start testing it
import importlib
importlib.reload(dynamic_module)

# Mutate the module to show the caching & invalidation
dynamic_module.add_after_start()

with freeze_time("2012-01-14"):
# This is NOT good - but was the previous behaviour of freezegun
assert dynamic_module.time_after_start != expected_timestamp
assert dynamic_module.dynamic_time != expected_timestamp # type: ignore
# This is NEW broken behaviour - dynamic_time_func is an attribute that
# is dynamically added to the module, and is NOT picked up by the
# caching mechanism
assert dynamic_module.dynamic_time_func() != expected_timestamp # type: ignore

0 comments on commit 94ecee6

Please sign in to comment.