Skip to content

Commit

Permalink
hashfile/checkout: use save_many to save state
Browse files Browse the repository at this point in the history
For MNIST dataset, this drops total runtime for `dvc add` from 24s
to 12s for me.
  • Loading branch information
skshetry committed Aug 7, 2024
1 parent 3b53bd2 commit f1d9ae1
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions src/dvc_data/hashfile/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING, Optional

from dvc_objects.fs.generic import test_links, transfer
from dvc_objects.fs.local import LocalFileSystem
from fsspec.callbacks import DEFAULT_CALLBACK

from .build import build
Expand All @@ -13,6 +14,7 @@
from fsspec import Callback

from ._ignore import Ignore
from .hash_info import HashInfo

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -110,10 +112,6 @@ def _checkout_file(
else:
link(cache, cache_path, fs, path)
modified = True

if state:
state.save(path, fs, change.new.oid)

return modified


Expand Down Expand Up @@ -178,7 +176,7 @@ def __call__(self, cache, from_path, to_fs, to_path):
raise LinkError(to_path) from exc


def _checkout(
def _checkout( # noqa: C901
diff,
path,
fs,
Expand All @@ -203,6 +201,8 @@ def _checkout(
_remove(entry_path, fs, change.old.in_cache, force=force, prompt=prompt)

failed = []
hashes_to_update: list[tuple[str, HashInfo, None]] = []
is_local_fs = isinstance(fs, LocalFileSystem)
for change in chain(diff.added, diff.modified):
entry_path = fs.join(path, *change.new.key) if change.new.key != ROOT else path
if change.new.oid.isdir:
Expand All @@ -223,6 +223,14 @@ def _checkout(
)
except CheckoutError as exc:
failed.extend(exc.paths)
else:
if is_local_fs:
hashes_to_update.append(
(entry_path, change.new.oid, fs.info(entry_path))
)

if state is not None:
state.save_many(hashes_to_update, fs)

if failed:
raise CheckoutError(failed)
Expand Down

0 comments on commit f1d9ae1

Please sign in to comment.