diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/cache.py b/robocorp-python-ls-core/src/robocorp_ls_core/cache.py index 6c809ea9dd..b6ba0191a4 100644 --- a/robocorp-python-ls-core/src/robocorp_ls_core/cache.py +++ b/robocorp-python-ls-core/src/robocorp_ls_core/cache.py @@ -1,13 +1,14 @@ from robocorp_ls_core.basic import implements from robocorp_ls_core.constants import NULL -from robocorp_ls_core.protocols import IDirCache, check_implements, T +from robocorp_ls_core.protocols import IDirCache, check_implements, T, Sentinel from robocorp_ls_core.robotframework_log import get_logger from collections import namedtuple from pathlib import Path -from typing import Any, Generic, Callable, Optional +from typing import Any, Generic, Callable, Optional, TypeVar import functools import os +import collections log = get_logger(__name__) @@ -39,8 +40,6 @@ def cache_this(self): @functools.wraps(func) def new_func(self, *args, **kwargs): - from robocorp_ls_core.protocols import Sentinel - try: cache = getattr(self, "__instance_cache__") except: @@ -207,3 +206,111 @@ def _get_mtime_cache_info(self, file_path: Path) -> Optional[CachedFileMTimeInfo def is_cache_valid(self) -> bool: return self._mtime_info == self._get_mtime_cache_info(self.file_path) + + +KT = TypeVar("KT") # Key type. +VT = TypeVar("VT") # Value type. + + +class LRUCache(Generic[KT, VT]): + """ + A cache which has a max size and a size to resize to when we'd go over the + limit available in the LRU. + + It's possible to customize the maximum size of the cache and to give a + custom weight for each entry. + + Note that this is not an exact-value capacity. It's possible that the + capacity is exceeded in a case where an item added and the resize_to + + item_size exceeds the max_size. + + This cache is NOT thread safe (users must lock if needed). + + It's designed so that it's a dict where the key points to a list with + [key, value, access_time, entry_size] + """ + + def __init__( + self, + max_size: int = 100, + resize_to: Optional[int] = None, + get_size: Callable[[Any], int] = lambda obj: 1, + ): + assert max_size >= 1 + self.max_size = max_size + + if resize_to is None: + resize_to = int(max_size * 0.7) + assert resize_to < max_size + + self.resize_to = resize_to + self._get_size = get_size + + self._dict: Any = collections.OrderedDict() + self._current_size_usage = 0 + + def clear(self) -> None: + self._current_size_usage = 0 + self._dict.clear() + + @property + def current_size_usage(self) -> int: + return self._current_size_usage + + def __getitem__(self, key: KT) -> VT: + item = self._dict[key] + self._dict.move_to_end(key) + return item[0] + + def __contains__(self, key: KT) -> bool: + try: + self[key] + return True + except KeyError: + return False + + def __len__(self) -> int: + return len(self._dict) + + def __delitem__(self, key: KT) -> None: + entry = self._dict.pop(key) + self._current_size_usage -= entry[-1] + + def pop(self, key: KT, default=Sentinel.SENTINEL) -> Optional[Any]: + try: + entry = self._dict.pop(key) + self._current_size_usage -= entry[-1] + return entry[0] + except KeyError: + if default is not Sentinel.SENTINEL: + return default + raise + + def __setitem__(self, key: KT, value: VT) -> None: + item = self._dict.get(key) + if item is None: + new_size = self._get_size(value) + + if self._current_size_usage + new_size > self.max_size: + # Note that we may exceed the size cache because we'll resize + # to the target value without accounting for the size of + # the new entry (and that's ok for this use case). + self._resize_to() + + self._current_size_usage += new_size + item = [value, new_size] + self._dict[key] = item + else: + item[0] = value + self._dict.move_to_end(key) + + def get(self, key: KT, default: Optional[Any] = None) -> Optional[Any]: + try: + return self[key] + except KeyError: + return default + + def _resize_to(self) -> None: + while self._current_size_usage > self.resize_to: + _key, entry = self._dict.popitem(last=False) + self._current_size_usage -= entry[-1] diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/uris.py b/robocorp-python-ls-core/src/robocorp_ls_core/uris.py index 69eb1398ac..bc8bf0a1e2 100644 --- a/robocorp-python-ls-core/src/robocorp_ls_core/uris.py +++ b/robocorp-python-ls-core/src/robocorp_ls_core/uris.py @@ -81,7 +81,7 @@ def _normalize_win_path(path): return path, netloc -@lru_cache(200) +@lru_cache(500) def from_fs_path(path: str) -> str: """Returns a URI for the given filesystem path.""" scheme = "file" @@ -90,14 +90,14 @@ def from_fs_path(path: str) -> str: return urlunparse((scheme, netloc, path, params, query, fragment)) -@lru_cache(200) +@lru_cache(500) def normalize_uri(uri: str) -> str: if uri_scheme(uri) == "file": return from_fs_path(to_fs_path(uri)) return uri -@lru_cache(200) +@lru_cache(500) def to_fs_path(uri: str) -> str: """Returns the filesystem path of the given URI. diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py b/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py index 5cd756c12a..f6ad27374f 100644 --- a/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py +++ b/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py @@ -323,6 +323,7 @@ def __init__( ) -> None: from robocorp_ls_core.lsp import WorkspaceFolder from robocorp_ls_core.callbacks import Callback + from robocorp_ls_core.cache import LRUCache self._main_thread = threading.current_thread() @@ -337,7 +338,36 @@ def __init__( self._docs: Dict[str, IDocument] = {} # Contains the docs pointing to the filesystem. - self._filesystem_docs: Dict[str, IDocument] = {} + def _get_size(doc: IDocument): + # In a simplistic way we say that each char in a document occupies + # 8 bytes and then multiply this by 7.5 to account for the supposed + # amount of memory used by the AST which is cached along in the document + return 8 * len(doc.source) * 7.5 + + one_gb_in_bytes = int(1e9) + target_memory_in_bytes: int = one_gb_in_bytes # Default value + + target_memory_in_bytes_str = os.environ.get( + "RFLS_FILES_TARGET_MEMORY_IN_BYTES", None + ) + if target_memory_in_bytes_str: + try: + target_memory_in_bytes = int(target_memory_in_bytes_str) + except: + log.critical( + "Expected RFLS_FILES_TARGET_MEMORY_IN_BYTES to evaluate to an int. Found: %s", + target_memory_in_bytes, + ) + + fifty_mb_in_bytes = int(5e7) + if target_memory_in_bytes <= fifty_mb_in_bytes: + target_memory_in_bytes = fifty_mb_in_bytes + + # Whenever we reach 1GB of used memory we clear up to use 700 MB. + self._filesystem_docs: LRUCache[str, IDocument] = LRUCache( + target_memory_in_bytes, get_size=_get_size + ) + self._filesystem_docs_lock = threading.Lock() self.on_file_changed = Callback() @@ -422,31 +452,31 @@ def get_folder_paths(self) -> List[str]: @implements(IWorkspace.get_document) def get_document(self, doc_uri: str, accept_from_file: bool) -> Optional[IDocument]: - # Ok, thread-safe (does not mutate the _docs dict -- contents in the _filesystem_docs - # may end up stale or we may have multiple loads when we wouldn't need, - # but that should be ok). + # Ok, thread-safe (does not mutate the _docs dict so the GIL keeps us + # safe -- contents in the _filesystem_docs need a lock though). doc = self._docs.get(normalize_uri(doc_uri)) if doc is not None: return doc if accept_from_file: - doc = self._filesystem_docs.get(doc_uri) - - if doc is not None: - if not doc.is_source_in_sync(): - self._filesystem_docs.pop(doc_uri, None) - doc = None - - if doc is None: - try: - doc = self._create_document(doc_uri, force_load_source=True) - except: - log.debug("Unable to load contents from: %s", doc_uri) - # Unable to load contents: file does not exist. - doc = None - else: - doc.immutable = True - self._filesystem_docs[doc_uri] = doc + with self._filesystem_docs_lock: + doc = self._filesystem_docs.get(doc_uri) + + if doc is not None: + if not doc.is_source_in_sync(): + self._filesystem_docs.pop(doc_uri, None) + doc = None + + if doc is None: + try: + doc = self._create_document(doc_uri, force_load_source=True) + except: + log.debug("Unable to load contents from: %s", doc_uri) + # Unable to load contents: file does not exist. + doc = None + else: + doc.immutable = True + self._filesystem_docs[doc_uri] = doc return doc @@ -473,7 +503,8 @@ def put_document(self, text_document: TextDocumentItem) -> IDocument: _source = doc.source except: doc.source = "" - self._filesystem_docs.pop(normalized_doc_uri, None) + with self._filesystem_docs_lock: + self._filesystem_docs.pop(normalized_doc_uri, None) return doc @implements(IWorkspace.remove_document) @@ -528,7 +559,8 @@ def dispose(self): self.remove_folder(folder_uri) self._docs = {} - self._filesystem_docs = {} + with self._filesystem_docs_lock: + self._filesystem_docs.clear() def __typecheckself__(self) -> None: from robocorp_ls_core.protocols import check_implements diff --git a/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_lru.py b/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_lru.py new file mode 100644 index 0000000000..3005e5d67e --- /dev/null +++ b/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_lru.py @@ -0,0 +1,80 @@ +import pytest + + +class Item(object): + def __init__(self, size): + self.size = size + + +def test_lru(): + from robocorp_ls_core.cache import LRUCache + + cache: LRUCache[int, Item] = LRUCache(100, get_size=lambda a: a.size) + assert round(cache.resize_to) == round(70) + cache[1] = Item(45) + + # Here we'd go over the capacity (and that's ok for this class as we'll + # resize to 70 and the sole item is below this). + cache[2] = Item(40) + + assert len(cache) == 2 + assert cache.current_size_usage == 40 + 45 + + # We'll still be over capacity and we'll have cycled only one element. + cache[3] = Item(39) + + assert len(cache) == 2 + assert 1 not in cache + assert cache.current_size_usage == 40 + 39 + + # Make it the last accessed in the LRU (so it's kept instead of the one with + # key == 3) + assert cache[2].size == 40 + + cache[4] = Item(38) + assert len(cache) == 2 + assert 3 not in cache + assert 2 in cache + assert 4 in cache + assert cache.current_size_usage == 40 + 38 + + cache.clear() + assert cache.current_size_usage == 0 + assert len(cache) == 0 + + +def test_lru_unitary_size(): + from robocorp_ls_core.cache import LRUCache + + cache = LRUCache(3, 1) + cache[1] = Item(1) + assert len(cache) == 1 + + cache[2] = Item(2) + assert len(cache) == 2 + + cache[3] = Item(3) + assert len(cache) == 3 + + cache[4] = Item(4) + # It was resized to 1 and then item 4 was added + assert len(cache) == 2 + + assert cache.get(5) is None + assert cache.current_size_usage == 2 + + del cache[4] + assert cache.current_size_usage == 1 + assert len(cache) == 1 + + with pytest.raises(KeyError): + del cache[4] + + assert 3 in cache + + item4 = cache[4] = Item(4) + assert cache.pop(4) is item4 + + with pytest.raises(KeyError): + cache.pop(4) + assert cache.pop(4, "foo") == "foo"