From 6b604508160722c13f44a8f501da5240aff42a64 Mon Sep 17 00:00:00 2001 From: Fabio Zadrozny Date: Wed, 28 Jul 2021 10:28:57 -0300 Subject: [PATCH] Allow ignoring directories for file-changes. Fixes #398 --- .../src/robocorp_ls_core/load_ignored_dirs.py | 138 ++++++++++++++++++ .../src/robocorp_ls_core/watchdog_wrapper.py | 11 +- .../src/robocorp_ls_core/workspace.py | 12 +- .../test_remote_fs_observer.py | 89 +++++++++++ robotframework-ls/docs/faq.md | 8 + 5 files changed, 240 insertions(+), 18 deletions(-) create mode 100644 robocorp-python-ls-core/src/robocorp_ls_core/load_ignored_dirs.py diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/load_ignored_dirs.py b/robocorp-python-ls-core/src/robocorp_ls_core/load_ignored_dirs.py new file mode 100644 index 0000000000..abac42f348 --- /dev/null +++ b/robocorp-python-ls-core/src/robocorp_ls_core/load_ignored_dirs.py @@ -0,0 +1,138 @@ +import os +from typing import Optional, Dict +import fnmatch +import glob + +from robocorp_ls_core.robotframework_log import get_logger + + +log = get_logger(__name__) + + +def _load_ignored_dirs_patterns(additional_dirs_to_ignore_str: Optional[str] = None): + ignored_dirs_patterns = set() + if additional_dirs_to_ignore_str is None: + additional_dirs_to_ignore_str = os.environ.get("ROBOTFRAMEWORK_LS_IGNORE_DIRS") + + if additional_dirs_to_ignore_str: + import json + + try: + additional_dirs_to_ignore = json.loads(additional_dirs_to_ignore_str) + except: + log.exception( + "Unable to load: %s (expected it to be a json list).", + additional_dirs_to_ignore_str, + ) + else: + if isinstance(additional_dirs_to_ignore, list): + for entry in additional_dirs_to_ignore: + if isinstance(entry, str): + ignored_dirs_patterns.add(entry) + else: + log.critical( + "Unable to load entry: %s from %s (because it's not a string).", + entry, + additional_dirs_to_ignore, + ) + else: + log.critical( + "Unable to load: %s (because it's not a list).", + additional_dirs_to_ignore_str, + ) + + return ignored_dirs_patterns + + +normcase = os.path.normcase + + +def _check_matches(patterns, paths): + if not patterns and not paths: + # Matched to the end. + return True + + if (not patterns and paths) or (patterns and not paths): + return False + + pattern = normcase(patterns[0]) + path = normcase(paths[0]) + + if not glob.has_magic(pattern): + + if pattern != path: + return False + + elif pattern == "**": + if len(patterns) == 1: + return True # if ** is the last one it matches anything to the right. + + for i in range(len(paths)): + # Recursively check the remaining patterns as the + # current pattern could match any number of paths. + if _check_matches(patterns[1:], paths[i:]): + return True + + elif not fnmatch.fnmatch(path, pattern): + # Current part doesn't match. + return False + + return _check_matches(patterns[1:], paths[1:]) + + +def glob_matches_path(path, pattern, sep=os.sep, altsep=os.altsep): + if altsep: + pattern = pattern.replace(altsep, sep) + path = path.replace(altsep, sep) + + drive = "" + if len(path) > 1 and path[1] == ":": + drive, path = path[0], path[2:] + + if drive and len(pattern) > 1: + if pattern[1] == ":": + if drive.lower() != pattern[0].lower(): + return False + pattern = pattern[2:] + + patterns = pattern.split(sep) + paths = path.split(sep) + if paths: + if paths[0] == "": + paths = paths[1:] + if patterns: + if patterns[0] == "": + patterns = patterns[1:] + + return _check_matches(patterns, paths) + + +def create_accept_directory_callable( + additional_dirs_to_ignore_str: Optional[str] = None +): + ignored_dirs = { + "**/.git", + "**/__pycache__", + "**/.idea", + "**/node_modules", + "**/.metadata", + "**/.vscode", + } + + ignored_dirs.update(_load_ignored_dirs_patterns(additional_dirs_to_ignore_str)) + + def accept_directory(dir_path: str, *, cache: Dict[str, bool] = {}): + try: + return cache[dir_path] + except KeyError: + for pattern in ignored_dirs: + if glob_matches_path(dir_path, pattern): + cache[dir_path] = False + log.debug("Directory untracked for changes: %s", dir_path) + return False + + log.debug("Directory tracked for changes: %s", dir_path) + cache[dir_path] = True + return True + + return accept_directory diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/watchdog_wrapper.py b/robocorp-python-ls-core/src/robocorp_ls_core/watchdog_wrapper.py index fcce596fdf..8142dcb96f 100644 --- a/robocorp-python-ls-core/src/robocorp_ls_core/watchdog_wrapper.py +++ b/robocorp-python-ls-core/src/robocorp_ls_core/watchdog_wrapper.py @@ -217,6 +217,8 @@ def __typecheckself__(self) -> None: class _FSNotifyObserver(threading.Thread): def __init__(self, extensions): + from robocorp_ls_core import load_ignored_dirs + threading.Thread.__init__(self) import fsnotify @@ -246,14 +248,7 @@ def __init__(self, extensions): watcher.target_time_for_single_scan = poll_time watcher.accepted_file_extensions = extensions - # Could be customizable... - watcher.ignored_dirs = { - ".git", - "__pycache__", - ".idea", - "node_modules", - ".metadata", - } + watcher.accept_directory = load_ignored_dirs.create_accept_directory_callable() self._all_paths_to_track = [] self._lock = threading.Lock() diff --git a/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py b/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py index 5385cbe816..a9f40f9281 100644 --- a/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py +++ b/robocorp-python-ls-core/src/robocorp_ls_core/workspace.py @@ -55,23 +55,15 @@ class _VirtualFSThread(threading.Thread): def __init__(self, virtual_fs): from robocorp_ls_core.watchdog_wrapper import IFSWatch + from robocorp_ls_core import load_ignored_dirs threading.Thread.__init__(self) self.daemon = True - from os.path import basename self._virtual_fs = weakref.ref(virtual_fs) self.root_folder_path = virtual_fs.root_folder_path - ignored_dirs = { - ".git", - "__pycache__", - ".idea", - "node_modules", - ".metadata", - ".vscode", - } - self.accept_directory = lambda dir_path: basename(dir_path) not in ignored_dirs + self.accept_directory = load_ignored_dirs.create_accept_directory_callable() self.accept_file = lambda path_name: path_name.endswith( tuple(virtual_fs._extensions) ) diff --git a/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_remote_fs_observer.py b/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_remote_fs_observer.py index 12209fba16..cdca453590 100644 --- a/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_remote_fs_observer.py +++ b/robocorp-python-ls-core/tests/robocorp_ls_core_tests/test_remote_fs_observer.py @@ -67,3 +67,92 @@ def check1(): watch.stop_tracking() notifier.dispose() observer.dispose() + + +def test_glob_matches_path(): + from robocorp_ls_core.load_ignored_dirs import glob_matches_path + import sys + + # Linux + for sep, altsep in (("\\", "/"), ("/", None)): + + def build(path): + if sep == "/": + return path + else: + return ("c:" + path).replace("/", "\\") + + assert glob_matches_path(build("/a"), r"*", sep, altsep) + + assert not glob_matches_path( + build("/a/b/c/some.py"), "/a/**/c/so?.py", sep, altsep + ) + + assert glob_matches_path("/a/b/c", "/a/b/*") + assert not glob_matches_path("/a/b", "/*") + assert glob_matches_path("/a/b", "/*/b") + assert glob_matches_path("/a/b", "**/*") + assert not glob_matches_path("/a/b", "**/a") + + assert glob_matches_path(build("/a/b/c/d"), "**/d", sep, altsep) + assert not glob_matches_path(build("/a/b/c/d"), "**/c", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "**/c/d", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "**/b/c/d", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "/*/b/*/d", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "**/c/*", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "/a/**/c/*", sep, altsep) + + # I.e. directories are expected to end with '/', so, it'll match + # something as **/directory/** + assert glob_matches_path(build("/a/b/c/"), "**/c/**", sep, altsep) + assert glob_matches_path(build("/a/b/c/"), "**/c/", sep, altsep) + # But not something as **/directory (that'd be a file match). + assert not glob_matches_path(build("/a/b/c/"), "**/c", sep, altsep) + assert not glob_matches_path(build("/a/b/c"), "**/c/", sep, altsep) + + assert glob_matches_path(build("/a/b/c/d.py"), "/a/**/c/*", sep, altsep) + assert glob_matches_path(build("/a/b/c/d.py"), "/a/**/c/*.py", sep, altsep) + assert glob_matches_path(build("/a/b/c/some.py"), "/a/**/c/so*.py", sep, altsep) + assert glob_matches_path( + build("/a/b/c/some.py"), "/a/**/c/som?.py", sep, altsep + ) + assert glob_matches_path(build("/a/b/c/d"), "/**", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), "/**/d", sep, altsep) + assert glob_matches_path(build("/a/b/c/d.py"), "/**/*.py", sep, altsep) + assert glob_matches_path(build("/a/b/c/d.py"), "**/c/*.py", sep, altsep) + + if sys.platform == "win32": + assert glob_matches_path(build("/a/b/c/d.py"), "**/C/*.py", sep, altsep) + assert glob_matches_path(build("/a/b/C/d.py"), "**/c/*.py", sep, altsep) + + # Expected not to match. + assert not glob_matches_path(build("/a/b/c/d"), "/**/d.py", sep, altsep) + assert not glob_matches_path(build("/a/b/c/d.pyx"), "/a/**/c/*.py", sep, altsep) + assert not glob_matches_path(build("/a/b/c/d"), "/*/d", sep, altsep) + + if sep == "/": + assert not glob_matches_path( + build("/a/b/c/d"), r"**\d", sep, altsep + ) # Match with \ doesn't work on linux... + assert not glob_matches_path( + build("/a/b/c/d"), r"c:\**\d", sep, altsep + ) # Match with drive doesn't work on linux... + else: + # Works in Windows. + assert glob_matches_path(build("/a/b/c/d"), r"**\d", sep, altsep) + assert glob_matches_path(build("/a/b/c/d"), r"c:\**\d", sep, altsep) + + # Corner cases + assert not glob_matches_path(build("/"), r"", sep, altsep) + assert glob_matches_path(build(""), r"", sep, altsep) + assert not glob_matches_path(build(""), r"**", sep, altsep) + assert glob_matches_path(build("/"), r"**", sep, altsep) + assert glob_matches_path(build("/"), r"*", sep, altsep) + + +def test_create_accept_directory_callable(): + from robocorp_ls_core.load_ignored_dirs import create_accept_directory_callable + + accept_directory = create_accept_directory_callable("") + assert not accept_directory("/my/node_modules") + assert accept_directory("/my") diff --git a/robotframework-ls/docs/faq.md b/robotframework-ls/docs/faq.md index 7b679b8390..f6c462a259 100644 --- a/robotframework-ls/docs/faq.md +++ b/robotframework-ls/docs/faq.md @@ -159,6 +159,14 @@ picks up the new environment variable value. **Note**: when possible using `watchdog` is recommended. +**Note**: when using `fsnotify` mode, it's possible to specify directories to be ignored with an environment variable +`ROBOTFRAMEWORK_LS_IGNORE_DIRS` which points to a json list with glob-patterns to ignore. + + i.e.: `ROBOTFRAMEWORK_LS_IGNORE_DIRS=["**/bin", "**/other/project"]` + + +**Note**: The following patterns are always ignored: `["**/.git", "**/__pycache__", "**/.idea", "**/node_modules", "**/.metadata", "**/.vscode"]` + How to solve (NO_ROBOT) too old for linting? --------------------------------------------