Skip to content

Commit 25807f2

Browse files
committed
dvc: make flufl.lock opt-in and use zc.lockfile
As it turned out (see issue numbers down below), we can't really take hardlinks for granted, so `flufl.lock` is not a panacea for all filesystems. Considering that the vast majority of filesystems that our users use support `zc.lockfile`(flock-based) and it has benefits like more reliable mechanism, auto-delete when process exits, more sturdy implementation, etc, it makes more sense to bring it back and use by default again. For filesystems that don't support `flock()`, users will be able to manually enable `flufl.lock` use through the config option. It would be ideal if we could auto-detect that flock is not supported, but in the real world, it turned out to be non-trivial, as it might hang forever in a kernel context, which makes the implementation way too complex for our purposes. So what we're doing instead is showing a message before locking with `zc.lockfile` that, under normal circumstances will disappear once the lock is taken or failed, otherwise it will point users to the related documentation where they can learn about how to opt-in for `flufl.lock`. Fixes #2831 Fixes #2897 Related #2860
1 parent fd62ba8 commit 25807f2

File tree

8 files changed

+102
-68
lines changed

8 files changed

+102
-68
lines changed

dvc/command/base.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import logging
44

5-
import colorama
6-
75

86
logger = logging.getLogger(__name__)
97

@@ -24,15 +22,13 @@ def fix_subparsers(subparsers):
2422

2523

2624
def append_doc_link(help_message, path):
25+
from dvc.utils import format_link
26+
2727
if not path:
2828
return help_message
2929
doc_base = "https://man.dvc.org/"
30-
return "{message}\nDocumentation: <{blue}{base}{path}{nc}>".format(
31-
message=help_message,
32-
base=doc_base,
33-
path=path,
34-
blue=colorama.Fore.CYAN,
35-
nc=colorama.Fore.RESET,
30+
return "{message}\nDocumentation: {link}".format(
31+
message=help_message, link=format_link(doc_base + path)
3632
)
3733

3834

@@ -44,7 +40,8 @@ def __init__(self, args):
4440
self.repo = Repo()
4541
self.config = self.repo.config
4642
self.args = args
47-
updater = Updater(self.repo.dvc_dir)
43+
hardlink_lock = self.config.config["core"].get("hardlink_lock", False)
44+
updater = Updater(self.repo.dvc_dir, hardlink_lock=hardlink_lock)
4845
updater.check()
4946

5047
@property

dvc/command/daemon.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@ class CmdDaemonUpdater(CmdDaemonBase):
1212
def run(self):
1313
import os
1414
from dvc.repo import Repo
15+
from dvc.config import Config
1516
from dvc.updater import Updater
1617

1718
root_dir = Repo.find_root()
1819
dvc_dir = os.path.join(root_dir, Repo.DVC_DIR)
19-
updater = Updater(dvc_dir)
20+
config = Config(dvc_dir, verify=False)
21+
hardlink_lock = config.config.get("core", {}).get(
22+
"hardlink_lock", False
23+
)
24+
updater = Updater(dvc_dir, hardlink_lock=hardlink_lock)
2025
updater.fetch(detach=False)
2126

2227
return 0

dvc/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ class Config(object): # pylint: disable=too-many-instance-attributes
125125
SECTION_CORE_INTERACTIVE = "interactive"
126126
SECTION_CORE_ANALYTICS = "analytics"
127127
SECTION_CORE_CHECKSUM_JOBS = "checksum_jobs"
128+
SECTION_CORE_HARDLINK_LOCK = "hardlink_lock"
128129

129130
SECTION_CACHE = "cache"
130131
SECTION_CACHE_DIR = "dir"
@@ -160,6 +161,7 @@ class Config(object): # pylint: disable=too-many-instance-attributes
160161
Optional(SECTION_CORE_INTERACTIVE, default=False): Bool,
161162
Optional(SECTION_CORE_ANALYTICS, default=True): Bool,
162163
SECTION_CORE_CHECKSUM_JOBS: All(Coerce(int), Range(1)),
164+
Optional(SECTION_CORE_HARDLINK_LOCK, default=False): Bool,
163165
}
164166

165167
# aws specific options

dvc/lock.py

Lines changed: 66 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import time
77
from datetime import timedelta
88

9+
import zc.lockfile
910
from funcy.py3 import lkeep
1011

1112
from dvc.exceptions import DvcException
12-
from dvc.utils import makedirs
13-
from dvc.utils.compat import is_py3
14-
13+
from dvc.utils import makedirs, format_link
14+
from dvc.utils.compat import is_py3, is_py2
15+
from dvc.progress import Tqdm
1516

1617
DEFAULT_TIMEOUT = 5
1718

@@ -26,10 +27,60 @@ class LockError(DvcException):
2627
"""Thrown when unable to acquire the lock for dvc repo."""
2728

2829

30+
class Lock(object):
31+
"""Class for dvc repo lock.
32+
33+
Uses zc.lockfile as backend.
34+
"""
35+
36+
def __init__(self, lockfile, friendly=False, **kwargs):
37+
self._friendly = friendly
38+
self.lockfile = lockfile
39+
self._lock = None
40+
41+
@property
42+
def files(self):
43+
return [self.lockfile]
44+
45+
def _do_lock(self):
46+
try:
47+
with Tqdm(
48+
bar_format="{desc}",
49+
disable=not self._friendly,
50+
desc=(
51+
"If DVC froze, see `hardlink_lock` in {}".format(
52+
format_link("man.dvc.org/config#core")
53+
)
54+
),
55+
):
56+
self._lock = zc.lockfile.LockFile(self.lockfile)
57+
except zc.lockfile.LockError:
58+
raise LockError(FAILED_TO_LOCK_MESSAGE)
59+
60+
def lock(self):
61+
try:
62+
self._do_lock()
63+
return
64+
except LockError:
65+
time.sleep(DEFAULT_TIMEOUT)
66+
67+
self._do_lock()
68+
69+
def unlock(self):
70+
self._lock.close()
71+
self._lock = None
72+
73+
def __enter__(self):
74+
self.lock()
75+
76+
def __exit__(self, typ, value, tbck):
77+
self.unlock()
78+
79+
2980
if is_py3:
3081
import flufl.lock
3182

32-
class Lock(flufl.lock.Lock):
83+
class HardlinkLock(flufl.lock.Lock):
3384
"""Class for dvc repo lock.
3485
3586
Args:
@@ -38,7 +89,7 @@ class Lock(flufl.lock.Lock):
3889
tmp_dir (str): a directory to store claim files.
3990
"""
4091

41-
def __init__(self, lockfile, tmp_dir=None):
92+
def __init__(self, lockfile, tmp_dir=None, **kwargs):
4293
import socket
4394

4495
self._tmp_dir = tmp_dir
@@ -101,44 +152,14 @@ def __del__(self):
101152
pass
102153

103154

104-
else:
105-
import zc.lockfile
106-
107-
class Lock(object):
108-
"""Class for dvc repo lock.
109-
110-
Uses zc.lockfile as backend.
111-
"""
112-
113-
def __init__(self, lockfile, tmp_dir=None):
114-
self.lockfile = lockfile
115-
self._lock = None
116-
117-
@property
118-
def files(self):
119-
return [self.lockfile]
120-
121-
def _do_lock(self):
122-
try:
123-
self._lock = zc.lockfile.LockFile(self.lockfile)
124-
except zc.lockfile.LockError:
125-
raise LockError(FAILED_TO_LOCK_MESSAGE)
126-
127-
def lock(self):
128-
try:
129-
self._do_lock()
130-
return
131-
except LockError:
132-
time.sleep(DEFAULT_TIMEOUT)
133-
134-
self._do_lock()
135-
136-
def unlock(self):
137-
self._lock.close()
138-
self._lock = None
139-
140-
def __enter__(self):
141-
self.lock()
155+
def make_lock(lockfile, tmp_dir=None, friendly=False, hardlink_lock=False):
156+
if hardlink_lock and is_py2:
157+
raise DvcException(
158+
"Hardlink locks are not supported on Python <3.5. "
159+
"See `hardlink_lock` in {}".format(
160+
format_link("man.dvc.org/config#core")
161+
)
162+
)
142163

143-
def __exit__(self, typ, value, tbck):
144-
self.unlock()
164+
cls = HardlinkLock if hardlink_lock else Lock
165+
return cls(lockfile, tmp_dir=tmp_dir, friendly=friendly)

dvc/repo/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class Repo(object):
6969

7070
def __init__(self, root_dir=None):
7171
from dvc.state import State
72-
from dvc.lock import Lock
72+
from dvc.lock import make_lock
7373
from dvc.scm import SCM
7474
from dvc.cache import Cache
7575
from dvc.data_cloud import DataCloud
@@ -88,9 +88,12 @@ def __init__(self, root_dir=None):
8888

8989
self.tree = WorkingTree(self.root_dir)
9090

91-
self.lock = Lock(
91+
hardlink_lock = self.config.config["core"].get("hardlink_lock", False)
92+
self.lock = make_lock(
9293
os.path.join(self.dvc_dir, "lock"),
9394
tmp_dir=os.path.join(self.dvc_dir, "tmp"),
95+
hardlink_lock=hardlink_lock,
96+
friendly=True,
9497
)
9598
# NOTE: storing state and link_state in the repository itself to avoid
9699
# any possible state corruption in 'shared cache dir' scenario.

dvc/updater.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
from packaging import version
1010

1111
from dvc import __version__
12-
from dvc.lock import Lock
13-
from dvc.lock import LockError
12+
from dvc.lock import make_lock, LockError
1413
from dvc.utils import boxify
1514
from dvc.utils import env2bool
1615
from dvc.utils.pkg import PKG
@@ -24,11 +23,14 @@ class Updater(object): # pragma: no cover
2423
TIMEOUT = 24 * 60 * 60 # every day
2524
TIMEOUT_GET = 10
2625

27-
def __init__(self, dvc_dir):
26+
def __init__(self, dvc_dir, friendly=False, hardlink_lock=False):
2827
self.dvc_dir = dvc_dir
2928
self.updater_file = os.path.join(dvc_dir, self.UPDATER_FILE)
30-
self.lock = Lock(
31-
self.updater_file + ".lock", tmp_dir=os.path.join(dvc_dir, "tmp")
29+
self.lock = make_lock(
30+
self.updater_file + ".lock",
31+
tmp_dir=os.path.join(dvc_dir, "tmp"),
32+
friendly=friendly,
33+
hardlink_lock=hardlink_lock,
3234
)
3335
self.current = version.parse(__version__).base_version
3436

dvc/utils/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,11 @@ def resolve_output(inp, out):
420420
if os.path.isdir(out):
421421
return os.path.join(out, name)
422422
return out
423+
424+
425+
def format_link(link):
426+
import colorama
427+
428+
return "<{blue}{link}{nc}>".format(
429+
blue=colorama.Fore.CYAN, link=link, nc=colorama.Fore.RESET
430+
)

setup.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def run(self):
7979
"shortuuid>=0.5.0",
8080
"tqdm>=4.40.0,<5",
8181
"packaging>=19.0",
82+
"zc.lockfile>=1.2.1",
8283
"win-unicode-console>=0.5; sys_platform == 'win32'",
8384
"pywin32>=225; sys_platform == 'win32'",
8485
]
@@ -163,12 +164,7 @@ def run(self):
163164
"ssh_gssapi": ssh_gssapi,
164165
"hdfs": hdfs,
165166
# NOTE: https://github.com/inveniosoftware/troubleshooting/issues/1
166-
":python_version=='2.7'": [
167-
"futures",
168-
"pathlib2",
169-
"contextlib2",
170-
"zc.lockfile>=1.2.1",
171-
],
167+
":python_version=='2.7'": ["futures", "pathlib2", "contextlib2"],
172168
":python_version>='3.0'": ["flufl.lock>=3.2"],
173169
"tests": tests_requirements,
174170
},

0 commit comments

Comments
 (0)