cvat-ai · zhiltsov-max · Aug 6, 2024 · Jul 25, 2024 · Jul 25, 2024 · Jul 26, 2024
@@ -0,0 +1,254 @@
+from functools import wraps
+from itertools import islice
+from typing import Any
+from typing import Callable
+from typing import Iterator
+
+from typing import TypeVar
+from typing import overload
+
+
+T = TypeVar("T", bound=int | float | str)
+
+
+def _parse_self_and_other_before_accessing(list_method: Callable[..., Any]) -> Callable[..., Any]:
+    @wraps(list_method)
+    def wrapper(self: 'LazyList', other: Any) -> 'LazyList':
+        self._parse_up_to(-1)
+        if isinstance(other, LazyList):
+            other._parse_up_to(-1)
+        if not isinstance(other, list):
+            # explicitly calling list.__add__ with
+            # np.ndarray raises TypeError instead of it returning NotImplemented
+            # this prevents python from executing np.ndarray.__radd__
+            return NotImplemented
+
+        return list_method(self, other)
+
+    return wrapper
+
+
+def _parse_self_before_accessing(list_method: Callable[..., Any]) -> Callable[..., Any]:
+    """Wrapper for original list methods. Forces LazyList to parse itself before accessing them."""
+    @wraps(list_method)
+    def wrapper(self: 'LazyList', *args, **kwargs) -> 'LazyList':
+        self._parse_up_to(-1)
+
+        return list_method(self, *args, **kwargs)
+
+    return wrapper
+
+
+class LazyListMeta(type):
+    def __new__(
+        mcs,
+        name: str,
+        bases: tuple[type, ...],
+        namespace: dict[str, Any],
+    ):
+        # add pre-parse for list methods
+        for method_name in [
+            "append",
+            "copy",
+            "insert",
+            "pop",
+            "remove",
+            "reverse",
+            "sort",
+            "clear",
+            "index",
+            "count",
+            "__setitem__",
+            "__delitem__",
+            "__contains__",
+            "__len__",
+            "__reversed__",
+            "__mul__",
+            "__rmul__",
+            "__imul__",
+        ]:
+            namespace[method_name] = _parse_self_before_accessing(
+                getattr(list, method_name)
+            )
+
+        for method_name in [
+            "extend",
+            "__add__",
+            "__iadd__",
+            "__eq__",
+            "__gt__",
+            "__ge__",
+            "__lt__",
+            "__le__",
+        ]:
+            namespace[method_name] = _parse_self_and_other_before_accessing(
+                getattr(list, method_name)
+            )
+
+        return super().__new__(mcs, name, bases, namespace)
+
+
+class LazyList(list[T], metaclass=LazyListMeta):
+    """
+    Evaluates elements from the string representation as needed.
+    Lazy evaluation is supported for __getitem__ and __iter__ methods.
+    Using any other method will result in parsing the whole string.
+    Once instance of LazyList is fully parsed (either by accessing list methods
+    or by iterating over all elements), it will behave just as a regular python list.
+    """
+    __slots__ = ("_string", "_separator", "_converter", "_probable_length", "_parsed")
+
+    def __init__(self, string: str = "", separator: str = ",", converter: Callable[[str], T] = lambda s: s) -> None:
+        super().__init__()
+        self._string = string
+        self._separator = separator
+        self._converter = converter
+        self._probable_length: int | None = None
+        self._parsed: bool = False
+
+    def __repr__(self) -> str:
+        if self._parsed:
+            return f"LazyList({list.__repr__(self)})"
+        current_index = list.__len__(self)
+        current_position = 1 if self._string.startswith('[') else 0
+        separator_offset = len(self._separator)
+
+        for _ in range(current_index):
+            current_position = self._string.find(self._separator, current_position) + separator_offset
+
+        parsed_elements = list.__repr__(self).removesuffix("]")
+        unparsed_elements = self._string[current_position:]
+        return (
+            f"LazyList({parsed_elements}... + {unparsed_elements}', "
+            f"({list.__len__(self) / self._compute_max_length(self._string) * 100:.02f}% parsed))"
+        )
+
+    def __deepcopy__(self, memodict: Any = None) -> list[T]:
+        """
+        Since our elements are scalar, this should be sufficient
+        Without this, deepcopy would copy the state of the object,
+        then would try to append its elements.
+
+        However, since copy will contain initial string,
+        it will compute its elements on the first on the first append,
+        resulting in value duplication.
+        """
+        return list(self)
+
+    @overload
+    def __getitem__(self, index: int) -> T: ...
+
+    @overload
+    def __getitem__(self, index: slice) -> list[T]: ...
+
+    def __getitem__(self, index: int | slice) -> T | list[T]:
+        if self._parsed:
+            return list.__getitem__(self, index)
+
+        if isinstance(index, slice):
+            self._parse_up_to(index.indices(self._compute_max_length(self._string))[1] - 1)
+            return list.__getitem__(self, index)
+
+        self._parse_up_to(index)
+        return list.__getitem__(self, index)
+
+    def __iter__(self) -> Iterator[T]:
+        yield from list.__iter__(self)
+        yield from self._iter_unparsed()
+
+    def __str__(self) -> str:
+        if not self._parsed:
+            return self._string.strip("[]")
+        return self._separator.join(map(str, self))
+
+    def _parse_up_to(self, index: int) -> None:
+        if self._parsed:
+            return
+
+        if index < 0:
+            index += self._compute_max_length(self._string)
+
+        start = list.__len__(self)
+        if start > index:
+            return
+        end = index - start + 1
+        for _ in islice(self._iter_unparsed(), end + 1):
+            pass
+
+        if index == self._compute_max_length(self._string) - 1:
+            self._mark_parsed()
+
+    def _mark_parsed(self):
+        self._parsed = True
+        self._string = ""  # freeing the memory
+
+    def _iter_unparsed(self):
+        if self._parsed:
+            return
+        string = self._string
+        current_index = list.__len__(self)
+        current_position = 1 if string.startswith('[') else 0
+        string_length = len(string) - 1 if string.endswith(']') else len(string)
+        separator_offset = len(self._separator)
+
+        for _ in range(current_index):
+            current_position = string.find(self._separator, current_position) + separator_offset
+
+        while current_index < self._compute_max_length(string):
+            end = string.find(self._separator, current_position, string_length)
+            if end == -1:
+                end = string_length
+                self._mark_parsed()
+
+            element_str = string[current_position:end]
+            current_position = end + separator_offset
+            if not element_str:
+                self._probable_length -= 1
+                continue
+            element = self._converter(element_str)
+            if list.__len__(self) <= current_index:
+                # We need to handle special case when instance of lazy list becomes parsed after
+                # this function is called:
+                # ll = LazyList("1,2,3", _converter=int)
+                # iterator = iter(ll)
+                # next(iterator)  # > 1 (will generate next element and append to self)
+                # list(ll)  # > [1, 2, 3]
+                # next(iterator)  # > 2 (will generate next element, however will not append it)
+                # assert list(ll) == [1, 2, 3]
+                list.append(self, element)
+            yield element
+            current_index += 1
+
+    def _compute_max_length(self, string) -> int:
+        if self._probable_length is None:
+            if not self._string:
+                return 0
+            self._probable_length = string.count(self._separator) + 1
+        return self._probable_length
+
+    # support pickling
+
+    def __reduce__(self):
+        return self.__class__, (self._string, self._separator, self._converter), self.__getstate__()
+
+    def __reduce_ex__(self, protocol: int):
+        return self.__reduce__()
+
+    def __getstate__(self):
+        return {
+            'string': self._string,
+            '_separator': self._separator,
+            '_converter': self._converter,
+            '_probable_length': self._probable_length,
+            'parsed': self._parsed,
+            'parsed_elements': list(self) if self._parsed else None
+        }
+
+    def __setstate__(self, state):
+        self._string = state['string']
+        self._separator = state['_separator']
+        self._converter = state['_converter']
+        self._probable_length = state['_probable_length']
+        self._parsed = state['parsed']
+        if self._parsed:
+            self.extend(state['parsed_elements'])
@@ -22,6 +22,7 @@
 from django.db.models import Q
 from drf_spectacular.types import OpenApiTypes
 from drf_spectacular.utils import extend_schema_field
+from cvat.apps.engine.lazy_list import LazyList
 
 from cvat.apps.engine.utils import parse_specific_attributes
 from cvat.apps.events.utils import cache_deleted
@@ -181,6 +182,7 @@ def choices(cls):
     def __str__(self):
         return self.value
 
+
 class AbstractArrayField(models.TextField):
     separator = ","
     converter = staticmethod(lambda x: x)
@@ -193,19 +195,20 @@ def __init__(self, *args, store_sorted:Optional[bool]=False, unique_values:Optio
     def from_db_value(self, value, expression, connection):
         if not value:
             return []
-        if value.startswith('[') and value.endswith(']'):
-            value = value[1:-1]
-        return [self.converter(v) for v in value.split(self.separator) if v]
+        return LazyList(string=value, separator=self.separator, converter=self.converter)
 
     def to_python(self, value):
-        if isinstance(value, list):
+        if isinstance(value, list | LazyList):
             return value
 
         return self.from_db_value(value, None, None)
 
     def get_prep_value(self, value):
+        if isinstance(value, LazyList) and not (self._unique_values or self._store_sorted):
+            return str(value)
+
         if self._unique_values:
-            value = list(dict.fromkeys(value))
+            value = dict.fromkeys(value)
         if self._store_sorted:
             value = sorted(value)
         return self.separator.join(map(str, value))