From 6f8ac0c1b1dc61910a9d9a0e84a445e2759cf47d Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Jan 2020 18:22:57 +0200 Subject: [PATCH 1/4] logger: support {} formatting --- dvc/logger.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dvc/logger.py b/dvc/logger.py index 3afb0f647b..77962fe9e5 100644 --- a/dvc/logger.py +++ b/dvc/logger.py @@ -58,8 +58,10 @@ class ColorFormatter(logging.Formatter): } def format(self, record): + msg = record.msg.format(*record.args) if record.args else record.msg + if record.levelname == "INFO": - return record.msg + return msg if record.levelname == "ERROR" or record.levelname == "CRITICAL": exception, stack_trace = self._parse_exc(record) @@ -70,8 +72,8 @@ def format(self, record): color=self.color_code.get(record.levelname, ""), nc=colorama.Fore.RESET, levelname=record.levelname, - description=self._description(record.msg, exception), - msg=record.msg, + description=self._description(msg, exception), + msg=msg, stack_trace=stack_trace, ) @@ -79,7 +81,7 @@ def format(self, record): color=self.color_code.get(record.levelname, ""), nc=colorama.Fore.RESET, levelname=record.levelname, - msg=record.msg, + msg=msg, ) def _current_level(self): From c2dae4b49cc524053902bcac9e826e612c0787a7 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Jan 2020 11:14:09 +0200 Subject: [PATCH 2/4] logger: remote: use lazy formatting Part of #1843 As our previous investigations showed, path stringification and stuff like relpath are taking a very large chunk of time when working with giant directories. This patch removes `format()`s and uses lazy formatting provided by logger instead, so stuff like path_info is not stringified until actually needed (e.g. on `-v`). --- dvc/remote/base.py | 80 +++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 9f47779c8f..4b6a8a5aa1 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -24,7 +24,7 @@ from dvc.progress import Tqdm from dvc.remote.slow_link_detection import slow_link_guard from dvc.state import StateNoop -from dvc.utils import relpath, tmp_fname +from dvc.utils import tmp_fname from dvc.utils.fs import move, makedirs from dvc.utils.http import open_url @@ -275,8 +275,10 @@ def load_dir_cache(self, checksum): raise DirCacheError(checksum) from exc if not isinstance(d, list): - msg = "dir cache file format error '{}' [skipping the file]" - logger.error(msg.format(relpath(path_info))) + logger.error( + "dir cache file format error '{}' [skipping the file]", + path_info, + ) return [] for info in d: @@ -342,36 +344,35 @@ def changed(self, path_info, checksum_info): """ logger.debug( - "checking if '{}'('{}') has changed.".format( - path_info, checksum_info - ) + "checking if '{}'('{}') has changed.", path_info, checksum_info ) if not self.exists(path_info): - logger.debug("'{}' doesn't exist.".format(path_info)) + logger.debug("'{}' doesn't exist.", path_info) return True checksum = checksum_info.get(self.PARAM_CHECKSUM) if checksum is None: - logger.debug("checksum for '{}' is missing.".format(path_info)) + logger.debug("checksum for '{}' is missing.", path_info) return True if self.changed_cache(checksum): logger.debug( - "cache for '{}'('{}') has changed.".format(path_info, checksum) + "cache for '{}'('{}') has changed.", path_info, checksum ) return True actual = self.get_checksum(path_info) if checksum != actual: logger.debug( - "checksum '{}'(actual '{}') for '{}' has changed.".format( - checksum, actual, path_info - ) + "checksum '{}'(actual '{}') for '{}' has changed.", + checksum, + actual, + path_info, ) return True - logger.debug("'{}' hasn't changed.".format(path_info)) + logger.debug("'{}' hasn't changed.", path_info) return False def link(self, from_info, to_info): @@ -394,8 +395,9 @@ def _try_links(self, from_info, to_info, link_types): return except DvcException as exc: - msg = "Cache type '{}' is not supported: {}" - logger.debug(msg.format(link_types[0], str(exc))) + logger.debug( + "Cache type '{}' is not supported: {}", link_types[0], exc + ) del link_types[0] raise DvcException("no possible cache types left to try out.") @@ -409,13 +411,13 @@ def _do_link(self, from_info, to_info, link_method): if self.protected: self.protect(to_info) - msg = "Created {}'{}': {} -> {}".format( + logger.debug( + "Created {}'{}': {} -> {}", "protected " if self.protected else "", self.cache_types[0], from_info, to_info, ) - logger.debug(msg) def _save_file(self, path_info, checksum, save_link=True): assert checksum @@ -517,7 +519,7 @@ def save(self, path_info, checksum_info): def _save(self, path_info, checksum): to_info = self.checksum_to_path_info(checksum) - logger.debug("Saving '{}' to '{}'.".format(path_info, to_info)) + logger.debug("Saving '{}' to '{}'.", path_info, to_info) if self.isdir(path_info): self._save_dir(path_info, checksum) return @@ -529,8 +531,9 @@ def _handle_transfer_exception( if isinstance(exception, OSError) and exception.errno == errno.EMFILE: raise exception - msg = "failed to {} '{}' to '{}'".format(operation, from_info, to_info) - logger.exception(msg) + logger.exception( + "failed to {} '{}' to '{}'", operation, from_info, to_info + ) return 1 def upload(self, from_info, to_info, name=None, no_progress_bar=False): @@ -543,7 +546,7 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False): if from_info.scheme != "local": raise NotImplementedError - logger.debug("Uploading '{}' to '{}'".format(from_info, to_info)) + logger.debug("Uploading '{}' to '{}'", from_info, to_info) name = name or from_info.name @@ -622,7 +625,7 @@ def _download_file( ): makedirs(to_info.parent, exist_ok=True, mode=dir_mode) - logger.debug("Downloading '{}' to '{}'".format(from_info, to_info)) + logger.debug("Downloading '{}' to '{}'", from_info, to_info) name = name or to_info.name tmp_file = tmp_fname(to_info) @@ -728,9 +731,10 @@ def changed_cache_file(self, checksum): actual = self.get_checksum(cache_info) logger.debug( - "cache '{}' expected '{}' actual '{}'".format( - str(cache_info), checksum, actual - ) + "cache '{}' expected '{}' actual '{}'", + cache_info, + checksum, + actual, ) if not checksum or not actual: @@ -740,7 +744,7 @@ def changed_cache_file(self, checksum): return False if self.exists(cache_info): - logger.warning("corrupted cache file '{}'.".format(cache_info)) + logger.warning("corrupted cache file '{}'.", cache_info) self.remove(cache_info) return True @@ -850,8 +854,9 @@ def _checkout_file( """The file is changed we need to checkout a new copy""" cache_info = self.checksum_to_path_info(checksum) if self.exists(path_info): - msg = "data '{}' exists. Removing before checkout." - logger.warning(msg.format(str(path_info))) + logger.warning( + "data '{}' exists. Removing before checkout.", path_info + ) self.safe_remove(path_info, force=force) self.link(cache_info, path_info) @@ -882,7 +887,7 @@ def _checkout_dir( dir_info = self.get_dir_cache(checksum) - logger.debug("Linking directory '{}'.".format(path_info)) + logger.debug("Linking directory '{}'.", path_info) for entry in dir_info: relative_path = entry[self.PARAM_RELPATH] @@ -933,22 +938,24 @@ def checkout( skip = False if not checksum: logger.warning( - "No checksum info found for '{}'. " - "It won't be created.".format(str(path_info)) + "No checksum info found for '{}'. " "It won't be created.", + path_info, ) self.safe_remove(path_info, force=force) failed = path_info elif not relink and not self.changed(path_info, checksum_info): - msg = "Data '{}' didn't change." - logger.debug(msg.format(str(path_info))) + logger.debug("Data '{}' didn't change.", path_info) skip = True elif self.changed_cache( checksum, path_info=path_info, filter_info=filter_info ): - msg = "Cache '{}' not found. File '{}' won't be created." - logger.warning(msg.format(checksum, str(path_info))) + logger.warning( + "Cache '{}' not found. File '{}' won't be created.", + checksum, + path_info, + ) self.safe_remove(path_info, force=force) failed = path_info @@ -962,8 +969,7 @@ def checkout( ) return failed - msg = "Checking out '{}' with cache '{}'." - logger.debug(msg.format(str(path_info), checksum)) + logger.debug("Checking out '{}' with cache '{}'.", path_info, checksum) self._checkout( path_info, checksum, force, progress_callback, relink, filter_info From 65c0c7e60d24cc9191f74cf90f47c84674a0fc8a Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Jan 2020 11:46:58 +0200 Subject: [PATCH 3/4] logger: state: use lazy formatting --- dvc/state.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dvc/state.py b/dvc/state.py index 4b5b0889b6..88e5c81071 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -141,7 +141,7 @@ def _execute(self, cmd, parameters=()): def _fetchall(self): ret = self.cursor.fetchall() - logger.debug("fetched: {}".format(ret)) + logger.debug("fetched: {}", ret) return ret def _to_sqlite(self, num): @@ -181,11 +181,12 @@ def _prepare_db(self, empty=False): __version__, self.VERSION, version ) elif version < self.VERSION: - msg = ( + logger.warning( "State file version '{}' is too old. " - "Reformatting to the current version '{}'." + "Reformatting to the current version '{}'.", + version, + self.VERSION, ) - logger.warning(msg.format(version, self.VERSION)) cmd = "DROP TABLE IF EXISTS {};" self._execute(cmd.format(self.STATE_TABLE)) self._execute(cmd.format(self.STATE_INFO_TABLE)) @@ -466,7 +467,7 @@ def remove_unused_links(self, used): actual_mtime, _ = get_mtime_and_size(path, self.repo.tree) if inode == actual_inode and mtime == actual_mtime: - logger.debug("Removing '{}' as unused link.".format(path)) + logger.debug("Removing '{}' as unused link.", path) remove(path) unused.append(relpath) From 19acded31f73a7853165a82016bf0f9f6064a9c1 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Jan 2020 11:51:40 +0200 Subject: [PATCH 4/4] logger: utils: fs: use lazy formatting --- dvc/rwlock.py | 2 +- dvc/utils/fs.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dvc/rwlock.py b/dvc/rwlock.py index d0665483c3..d805bcd55a 100644 --- a/dvc/rwlock.py +++ b/dvc/rwlock.py @@ -9,7 +9,7 @@ from .exceptions import DvcException from .lock import LockError -from .utils.fs import relpath +from .utils import relpath INFO_SCHEMA = {"pid": int, "cmd": str} diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 56f3c9975b..aef0552359 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -13,7 +13,6 @@ from dvc.utils import dict_md5 from dvc.utils import fspath from dvc.utils import fspath_py35 -from dvc.utils import relpath logger = logging.getLogger(__name__) @@ -30,7 +29,7 @@ def fs_copy(src, dst): def get_inode(path): inode = System.inode(path) - logger.debug("Path {} inode {}".format(path, inode)) + logger.debug("Path {} inode {}", path, inode) return inode @@ -131,10 +130,9 @@ def _chmod(func, p, excinfo): def remove(path): - path = fspath_py35(path) - - logger.debug("Removing '{}'".format(relpath(path))) + logger.debug("Removing '{}'", path) + path = fspath_py35(path) try: if os.path.isdir(path): shutil.rmtree(path, onerror=_chmod)