Skip to content

Commit

Permalink
Fix several bugs with LazyList
Browse files Browse the repository at this point in the history
1. Support pickling
2. Handle cases where LazyList is extended with/added to another LazyList
3. Fix cases when unparsed LazyList was being compared result
4. Fix cases when deep copying resulted in duplicate values
  • Loading branch information
Bobronium committed Jul 29, 2024
1 parent 078669e commit 2a68e50
Show file tree
Hide file tree
Showing 2 changed files with 325 additions and 71 deletions.
230 changes: 159 additions & 71 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: MIT

from __future__ import annotations
from dataclasses import dataclass, field

from itertools import islice

import os
Expand Down Expand Up @@ -187,29 +187,34 @@ def __str__(self):
T = TypeVar("T")


def _parse_both_before_accessing(fn):
@wraps(fn)
def wrapper(self: 'LazyList', other: Any) -> 'LazyList':
self._parse_up_to(-1)
if not isinstance(other, list):
# explicitly calling list.__add__ with
# np.ndarray raises TypeError instead of it returning NotImplemented
# this prevents python from executing np.ndarray.__radd__
return NotImplemented
if isinstance(other, LazyList):
other._parse_up_to(-1)

return fn(self, other)

return wrapper


def _parse_before_accessing(fn: Callable[..., Any]) -> Callable[..., Any]:
"""Wrapper for original list methods. Forces LazyList to parse itself before accessing them."""
if fn.__name__ in {"__add__", "__mul__"}:
@wraps(fn)
def wrapper(self: 'LazyList', other):
self._parse_up_to(-1)
if not isinstance(other, list):
# explicitly calling list.__add__ with
# np.ndarray raises TypeError instead of it returning NotImplemented
# this prevents python from executing np.ndarray.__radd__
return NotImplemented
return fn(self, other)
else:
@wraps(fn)
def wrapper(self: 'LazyList', *args, **kwargs) -> 'LazyList':
self._parse_up_to(-1)

return fn(self, *args, **kwargs)
@wraps(fn)
def wrapper(self: 'LazyList', *args, **kwargs) -> 'LazyList':
self._parse_up_to(-1)

return fn(self, *args, **kwargs)

return wrapper


@dataclass(slots=True)
class LazyList(list[T]):
"""
Evaluates elements from the string representation as needed.
Expand All @@ -218,43 +223,48 @@ class LazyList(list[T]):
Once instance of LazyList is fully parsed (either by accessing list methods
or by iterating over all elements), it will behave just as a regular python list.
"""
__slots__ = ("string", "_separator", "_converter", "_probable_length", "parsed")
string: str
_separator: str
_converter: Callable[[str], T]
_probable_length: int | None = field(init=False, default=None)
parsed: bool = field(init=False, default=False)
_probable_length: int | None

for method in [
"append",
"copy",
"extend",
"insert",
"pop",
"remove",
"reverse",
"sort",
"clear",
"__setitem__",
"__delitem__",
"__eq__",
"__contains__",
"__len__",
"__add__",
"__iadd__",
"__mul__",
"__rmul__",
"__imul__",
"__contains__",
"__reversed__",
"__gt__",
"__ge__",
"__lt__",
"__le__",
"__eq__",
"__repr__",
"__len__",
]:
locals()[method] = _parse_before_accessing(getattr(list, method))
def __init__(self, string: str = "", separator: str = ",", converter: Callable[[str], T] = lambda s: s) -> None:
super().__init__()
self.string = string
self._separator = separator
self._converter = converter
self._probable_length = None
self.parsed = False

def __repr__(self) -> str:
if self.parsed:
return f"LazyList({list.__repr__(self)})"
current_index = list.__len__(self)
current_position = 1 if self.string.startswith('[') else 0
separator_offset = len(self._separator)

for _ in range(current_index):
current_position = self.string.find(self._separator, current_position) + separator_offset

parsed_elements = list.__repr__(self).removesuffix("]")
unparsed_elements = self.string[current_position:]
return (
f"LazyList({parsed_elements}... + {unparsed_elements}', "
f"({list.__len__(self) / self._compute_max_length(self.string) * 100:.02f}% parsed))"
)

def __deepcopy__(self, memodict: Any = None) -> list[T]:
"""
Since our elements are scalar, this should be sufficient
Without this, deepcopy would copy the state of the object,
then would try to append its elements.
However, since copy will contain initial string,
it will compute its elements on the first on the first append,
resulting in value duplication.
"""
return list(self)

@overload
def __getitem__(self, index: int) -> T: ...
Expand All @@ -267,7 +277,7 @@ def __getitem__(self, index: int | slice) -> T | list[T]:
return list.__getitem__(self, index)

if isinstance(index, slice):
self._parse_up_to(index.indices(self._compute_max_length())[1] - 1)
self._parse_up_to(index.indices(self._compute_max_length(self.string))[1] - 1)
return list.__getitem__(self, index)

self._parse_up_to(index)
Expand All @@ -282,54 +292,132 @@ def _parse_up_to(self, index: int) -> None:
return

if index < 0:
index += self._compute_max_length()
if index < 0 or index >= self._compute_max_length():
raise IndexError('Index out of range')
index += self._compute_max_length(self.string)

start = list.__len__(self)
if start > index:
return

end = index - start + 1
for _ in islice(self._iter_unparsed(), end):
for _ in islice(self._iter_unparsed(), end + 1):
pass

if index == self._compute_max_length() - 1:
self.parsed = True
self.string = "" # freeing the memory
if index == self._compute_max_length(self.string) - 1:
self._mark_parsed()

def _mark_parsed(self):
self.parsed = True
self.string = "" # freeing the memory

def _iter_unparsed(self):
if self.parsed:
return
string = self.string
current_index = list.__len__(self)
current_position = 1 if self.string.startswith('[') else 0
string_length = len(self.string) - 1 if self.string.endswith(']') else len(self.string)
current_position = 1 if string.startswith('[') else 0
string_length = len(string) - 1 if string.endswith(']') else len(string)
separator_offset = len(self._separator)

for _ in range(current_index):
current_position = self.string.find(self._separator, current_position) + separator_offset
current_position = string.find(self._separator, current_position) + separator_offset

while current_index < self._compute_max_length():
end = self.string.find(self._separator, current_position, string_length)
while current_index < self._compute_max_length(string):
end = string.find(self._separator, current_position, string_length)
if end == -1:
end = string_length
self.parsed = True
self._mark_parsed()

element_str = self.string[current_position:end]
element_str = string[current_position:end]
current_position = end + separator_offset
if not element_str:
self._probable_length -= 1
continue
element = self._converter(element_str)
list.append(self, element)
if list.__len__(self) <= current_index:
# We need to handle special case when instance of lazy list becomes parsed after
# this function is called:
# ll = LazyList("1,2,3", _converter=int)
# iterator = iter(ll)
# next(iterator) # > 1 (will generate next element and append to self)
# list(ll) # > [1, 2, 3]
# next(iterator) # > 2 (will generate next element, however will not append it)
# assert list(ll) == [1, 2, 3]
list.append(self, element)
yield element
current_index += 1

def _compute_max_length(self) -> int:
def _compute_max_length(self, string) -> int:
if self._probable_length is None:
self._probable_length = self.string.count(self._separator) + 1
if not self.string:
return 0
self._probable_length = string.count(self._separator) + 1
return self._probable_length

# support pickling

def __reduce__(self):
return self.__class__, (self.string, self._separator, self._converter), self.__getstate__()

def __reduce_ex__(self, protocol: int):
return self.__reduce__()

def __getstate__(self):
return {
'string': self.string,
'_separator': self._separator,
'_converter': self._converter,
'_probable_length': self._probable_length,
'parsed': self.parsed,
'parsed_elements': list(self) if self.parsed else None
}

def __setstate__(self, state):
self.string = state['string']
self._separator = state['_separator']
self._converter = state['_converter']
self._probable_length = state['_probable_length']
self.parsed = state['parsed']
if self.parsed:
self.extend(state['parsed_elements'])

# add pre-parse for list methods
for method in [
"append",
"copy",
"insert",
"pop",
"remove",
"reverse",
"sort",
"clear",
"index",
"count",
"__setitem__",
"__delitem__",
"__contains__",
"__len__",
"__contains__",
"__reversed__",
"__mul__",
"__rmul__",
"__imul__",
]:
locals()[method] = _parse_before_accessing(getattr(list, method))

for method in [
"extend",
"__add__",
"__eq__",
"__iadd__",
"__gt__",
"__ge__",
"__lt__",
"__le__",
"__eq__",
]:
locals()[method] = _parse_both_before_accessing(getattr(list, method))

del method


class AbstractArrayField(models.TextField):
separator = ","
Expand All @@ -343,7 +431,7 @@ def __init__(self, *args, store_sorted:Optional[bool]=False, unique_values:Optio
def from_db_value(self, value, expression, connection):
if not value:
return []
return LazyList(value, self.separator, self.converter)
return LazyList(string=value, separator=self.separator, converter=self.converter)

def to_python(self, value):
if isinstance(value, list | LazyList):
Expand Down
Loading

0 comments on commit 2a68e50

Please sign in to comment.