Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit e4fa33b

Browse files
committedMay 23, 2022
Add copy-on-write support to shutil
1 parent 985159a commit e4fa33b

File tree

5 files changed

+144
-35
lines changed

5 files changed

+144
-35
lines changed
 

‎Doc/library/shutil.rst

+27-3
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Directory and files operations
4848
be copied.
4949

5050

51-
.. function:: copyfile(src, dst, *, follow_symlinks=True)
51+
.. function:: copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True)
5252

5353
Copy the contents (no metadata) of the file named *src* to a file named
5454
*dst* and return *dst* in the most efficient way possible.
@@ -67,6 +67,10 @@ Directory and files operations
6767
a new symbolic link will be created instead of copying the
6868
file *src* points to.
6969

70+
*allow_reflink* enables copy-on-write on supported Linux filesystems.
71+
The :c:func:`copy_file_range` system call is used internally when
72+
*allow_reflink* is true.
73+
7074
.. audit-event:: shutil.copyfile src,dst shutil.copyfile
7175

7276
.. versionchanged:: 3.3
@@ -83,6 +87,10 @@ Directory and files operations
8387
copy the file more efficiently. See
8488
:ref:`shutil-platform-dependent-efficient-copy-operations` section.
8589

90+
.. versionchanged:: 3.12
91+
Added *allow_reflink* argument. Copy-on-write is enabled by default on
92+
supported Linux filesystems.
93+
8694
.. exception:: SameFileError
8795

8896
This exception is raised if source and destination in :func:`copyfile`
@@ -155,7 +163,7 @@ Directory and files operations
155163
.. versionchanged:: 3.3
156164
Added *follow_symlinks* argument and support for Linux extended attributes.
157165

158-
.. function:: copy(src, dst, *, follow_symlinks=True)
166+
.. function:: copy(src, dst, *, follow_symlinks=True, allow_reflink=True)
159167

160168
Copies the file *src* to the file or directory *dst*. *src* and *dst*
161169
should be :term:`path-like objects <path-like object>` or strings. If
@@ -168,6 +176,10 @@ Directory and files operations
168176
is true and *src* is a symbolic link, *dst* will be a copy of
169177
the file *src* refers to.
170178

179+
*allow_reflink* enables copy-on-write on supported Linux filesystems.
180+
The :c:func:`copy_file_range` system call is used internally when
181+
*allow_reflink* is true.
182+
171183
:func:`~shutil.copy` copies the file data and the file's permission
172184
mode (see :func:`os.chmod`). Other metadata, like the
173185
file's creation and modification times, is not preserved.
@@ -187,7 +199,11 @@ Directory and files operations
187199
copy the file more efficiently. See
188200
:ref:`shutil-platform-dependent-efficient-copy-operations` section.
189201

190-
.. function:: copy2(src, dst, *, follow_symlinks=True)
202+
.. versionchanged:: 3.12
203+
Added *allow_reflink* argument. Copy-on-write is enabled by default on
204+
supported Linux filesystems.
205+
206+
.. function:: copy2(src, dst, *, follow_symlinks=True, allow_reflink=True)
191207

192208
Identical to :func:`~shutil.copy` except that :func:`copy2`
193209
also attempts to preserve file metadata.
@@ -201,6 +217,10 @@ Directory and files operations
201217
it can; :func:`copy2` never raises an exception because it
202218
cannot preserve file metadata.
203219

220+
*allow_reflink* enables copy-on-write on supported Linux filesystems.
221+
The :c:func:`copy_file_range` system call is used internally when
222+
*allow_reflink* is true.
223+
204224
:func:`copy2` uses :func:`copystat` to copy the file metadata.
205225
Please see :func:`copystat` for more information
206226
about platform support for modifying symbolic link metadata.
@@ -219,6 +239,10 @@ Directory and files operations
219239
copy the file more efficiently. See
220240
:ref:`shutil-platform-dependent-efficient-copy-operations` section.
221241

242+
.. versionchanged:: 3.12
243+
Added *allow_reflink* argument. Copy-on-write is enabled by default on
244+
supported Linux filesystems.
245+
222246
.. function:: ignore_patterns(*patterns)
223247

224248
This factory function creates a function that can be used as a callable for

‎Lib/shutil.py

+61-11
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
# This should never be removed, see rationale in:
4444
# https://bugs.python.org/issue43743#msg393429
4545
_USE_CP_SENDFILE = hasattr(os, "sendfile") and sys.platform.startswith("linux")
46+
_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range")
4647
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
4748

4849
# CMD defaults in Windows 10
@@ -122,6 +123,47 @@ def _determine_linux_fastcopy_blocksize(infd):
122123
blocksize = min(blocksize, 2 ** 30)
123124
return blocksize
124125

126+
def _fastcopy_copy_file_range(fsrc, fdst):
127+
"""Copy data from one regular mmap-like fd to another by using
128+
a high-performance copy_file_range(2) syscall that gives filesystems
129+
an opportunity to implement the use of reflinks or server-side copy.
130+
131+
This should work on Linux >= 4.5 only.
132+
"""
133+
try:
134+
infd = fsrc.fileno()
135+
outfd = fdst.fileno()
136+
except Exception as err:
137+
raise _GiveupOnFastCopy(err) # not a regular file
138+
139+
blocksize = _determine_linux_fastcopy_blocksize(infd)
140+
offset = 0
141+
while True:
142+
try:
143+
n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset)
144+
except OSError as err:
145+
# ...in oder to have a more informative exception.
146+
err.filename = fsrc.name
147+
err.filename2 = fdst.name
148+
149+
if err.errno == errno.ENOSPC: # filesystem is full
150+
raise err from None
151+
152+
# Give up on first call and if no data was copied.
153+
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
154+
raise _GiveupOnFastCopy(err)
155+
156+
raise err
157+
else:
158+
if n_copied == 0:
159+
# If no bytes have been copied yet, copy_file_range
160+
# might silently fail.
161+
# https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
162+
if offset == 0:
163+
raise _GiveupOnFastCopy()
164+
break
165+
offset += n_copied
166+
125167
def _fastcopy_sendfile(fsrc, fdst):
126168
"""Copy data from one regular mmap-like fd to another by using
127169
high-performance sendfile(2) syscall.
@@ -230,7 +272,7 @@ def _stat(fn):
230272
def _islink(fn):
231273
return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn)
232274

233-
def copyfile(src, dst, *, follow_symlinks=True):
275+
def copyfile(src, dst, *, follow_symlinks=True, allow_reflink=True):
234276
"""Copy data from src to dst in the most efficient way possible.
235277
236278
If follow_symlinks is not set and src is a symbolic link, a new
@@ -271,12 +313,20 @@ def copyfile(src, dst, *, follow_symlinks=True):
271313
except _GiveupOnFastCopy:
272314
pass
273315
# Linux
274-
elif _USE_CP_SENDFILE:
275-
try:
276-
_fastcopy_sendfile(fsrc, fdst)
277-
return dst
278-
except _GiveupOnFastCopy:
279-
pass
316+
elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE:
317+
# reflink may be implicit in copy_file_range.
318+
if _USE_CP_COPY_FILE_RANGE and allow_reflink:
319+
try:
320+
_fastcopy_copy_file_range(fsrc, fdst)
321+
return dst
322+
except _GiveupOnFastCopy:
323+
pass
324+
if _USE_CP_SENDFILE:
325+
try:
326+
_fastcopy_sendfile(fsrc, fdst)
327+
return dst
328+
except _GiveupOnFastCopy:
329+
pass
280330
# Windows, see:
281331
# https://github.com/python/cpython/pull/7160#discussion_r195405230
282332
elif _WINDOWS and file_size > 0:
@@ -409,7 +459,7 @@ def lookup(name):
409459
else:
410460
raise
411461

412-
def copy(src, dst, *, follow_symlinks=True):
462+
def copy(src, dst, *, follow_symlinks=True, allow_reflink=True):
413463
"""Copy data and mode bits ("cp src dst"). Return the file's destination.
414464
415465
The destination may be a directory.
@@ -423,11 +473,11 @@ def copy(src, dst, *, follow_symlinks=True):
423473
"""
424474
if os.path.isdir(dst):
425475
dst = os.path.join(dst, os.path.basename(src))
426-
copyfile(src, dst, follow_symlinks=follow_symlinks)
476+
copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink)
427477
copymode(src, dst, follow_symlinks=follow_symlinks)
428478
return dst
429479

430-
def copy2(src, dst, *, follow_symlinks=True):
480+
def copy2(src, dst, *, follow_symlinks=True, allow_reflink=True):
431481
"""Copy data and metadata. Return the file's destination.
432482
433483
Metadata is copied with copystat(). Please see the copystat function
@@ -440,7 +490,7 @@ def copy2(src, dst, *, follow_symlinks=True):
440490
"""
441491
if os.path.isdir(dst):
442492
dst = os.path.join(dst, os.path.basename(src))
443-
copyfile(src, dst, follow_symlinks=follow_symlinks)
493+
copyfile(src, dst, follow_symlinks=follow_symlinks, allow_reflink=allow_reflink)
444494
copystat(src, dst, follow_symlinks=follow_symlinks)
445495
return dst
446496

‎Lib/test/test_shutil.py

+50-21
Original file line numberDiff line numberDiff line change
@@ -2467,12 +2467,8 @@ def test_filesystem_full(self):
24672467
self.assertRaises(OSError, self.zerocopy_fun, src, dst)
24682468

24692469

2470-
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
2471-
class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
2472-
PATCHPOINT = "os.sendfile"
2473-
2474-
def zerocopy_fun(self, fsrc, fdst):
2475-
return shutil._fastcopy_sendfile(fsrc, fdst)
2470+
class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest):
2471+
BLOCKSIZE_INDEX = None
24762472

24772473
def test_non_regular_file_src(self):
24782474
with io.BytesIO(self.FILEDATA) as src:
@@ -2493,65 +2489,65 @@ def test_non_regular_file_dst(self):
24932489
self.assertEqual(dst.read(), self.FILEDATA)
24942490

24952491
def test_exception_on_second_call(self):
2496-
def sendfile(*args, **kwargs):
2492+
def syscall(*args, **kwargs):
24972493
if not flag:
24982494
flag.append(None)
2499-
return orig_sendfile(*args, **kwargs)
2495+
return orig_syscall(*args, **kwargs)
25002496
else:
25012497
raise OSError(errno.EBADF, "yo")
25022498

25032499
flag = []
2504-
orig_sendfile = os.sendfile
2505-
with unittest.mock.patch('os.sendfile', create=True,
2506-
side_effect=sendfile):
2500+
orig_syscall = eval(self.PATCHPOINT)
2501+
with unittest.mock.patch(self.PATCHPOINT, create=True,
2502+
side_effect=syscall):
25072503
with self.get_files() as (src, dst):
25082504
with self.assertRaises(OSError) as cm:
2509-
shutil._fastcopy_sendfile(src, dst)
2505+
self.zerocopy_fun(src, dst)
25102506
assert flag
25112507
self.assertEqual(cm.exception.errno, errno.EBADF)
25122508

25132509
def test_cant_get_size(self):
25142510
# Emulate a case where src file size cannot be determined.
25152511
# Internally bufsize will be set to a small value and
2516-
# sendfile() will be called repeatedly.
2512+
# a system call will be called repeatedly.
25172513
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
25182514
with self.get_files() as (src, dst):
2519-
shutil._fastcopy_sendfile(src, dst)
2515+
self.zerocopy_fun(src, dst)
25202516
assert m.called
25212517
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
25222518

25232519
def test_small_chunks(self):
25242520
# Force internal file size detection to be smaller than the
2525-
# actual file size. We want to force sendfile() to be called
2521+
# actual file size. We want to force a system call to be called
25262522
# multiple times, also in order to emulate a src fd which gets
25272523
# bigger while it is being copied.
25282524
mock = unittest.mock.Mock()
25292525
mock.st_size = 65536 + 1
25302526
with unittest.mock.patch('os.fstat', return_value=mock) as m:
25312527
with self.get_files() as (src, dst):
2532-
shutil._fastcopy_sendfile(src, dst)
2528+
self.zerocopy_fun(src, dst)
25332529
assert m.called
25342530
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
25352531

25362532
def test_big_chunk(self):
25372533
# Force internal file size detection to be +100MB bigger than
2538-
# the actual file size. Make sure sendfile() does not rely on
2534+
# the actual file size. Make sure a system call does not rely on
25392535
# file size value except for (maybe) a better throughput /
25402536
# performance.
25412537
mock = unittest.mock.Mock()
25422538
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
25432539
with unittest.mock.patch('os.fstat', return_value=mock) as m:
25442540
with self.get_files() as (src, dst):
2545-
shutil._fastcopy_sendfile(src, dst)
2541+
self.zerocopy_fun(src, dst)
25462542
assert m.called
25472543
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)
25482544

25492545
def test_blocksize_arg(self):
2550-
with unittest.mock.patch('os.sendfile',
2546+
with unittest.mock.patch(self.PATCHPOINT,
25512547
side_effect=ZeroDivisionError) as m:
25522548
self.assertRaises(ZeroDivisionError,
25532549
shutil.copyfile, TESTFN, TESTFN2)
2554-
blocksize = m.call_args[0][3]
2550+
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
25552551
# Make sure file size and the block size arg passed to
25562552
# sendfile() are the same.
25572553
self.assertEqual(blocksize, os.path.getsize(TESTFN))
@@ -2561,9 +2557,19 @@ def test_blocksize_arg(self):
25612557
self.addCleanup(os_helper.unlink, TESTFN2 + '3')
25622558
self.assertRaises(ZeroDivisionError,
25632559
shutil.copyfile, TESTFN2, TESTFN2 + '3')
2564-
blocksize = m.call_args[0][3]
2560+
blocksize = m.call_args[0][self.BLOCKSIZE_INDEX]
25652561
self.assertEqual(blocksize, 2 ** 23)
25662562

2563+
2564+
@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
2565+
@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False)
2566+
class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase):
2567+
PATCHPOINT = "os.sendfile"
2568+
BLOCKSIZE_INDEX = 3
2569+
2570+
def zerocopy_fun(self, fsrc, fdst):
2571+
return shutil._fastcopy_sendfile(fsrc, fdst)
2572+
25672573
def test_file2file_not_supported(self):
25682574
# Emulate a case where sendfile() only support file->socket
25692575
# fds. In such a case copyfile() is supposed to skip the
@@ -2586,6 +2592,29 @@ def test_file2file_not_supported(self):
25862592
shutil._USE_CP_SENDFILE = True
25872593

25882594

2595+
@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported")
2596+
class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase):
2597+
PATCHPOINT = "os.copy_file_range"
2598+
BLOCKSIZE_INDEX = 2
2599+
2600+
def zerocopy_fun(self, fsrc, fdst):
2601+
return shutil._fastcopy_copy_file_range(fsrc, fdst)
2602+
2603+
def test_empty_file(self):
2604+
srcname = f"{TESTFN}src"
2605+
dstname = f"{TESTFN}dst"
2606+
self.addCleanup(lambda: os_helper.unlink(srcname))
2607+
self.addCleanup(lambda: os_helper.unlink(dstname))
2608+
with open(srcname, "wb"):
2609+
pass
2610+
2611+
with open(srcname, "rb") as src, open(dstname, "wb") as dst:
2612+
# _fastcopy_copy_file_range gives up copying empty files due
2613+
# to a bug in older Linux.
2614+
with self.assertRaises(shutil._GiveupOnFastCopy):
2615+
self.zerocopy_fun(src, dst)
2616+
2617+
25892618
@unittest.skipIf(not MACOS, 'macOS only')
25902619
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
25912620
PATCHPOINT = "posix._fcopyfile"

‎Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,7 @@ Johannes Vogel
18781878
Michael Vogt
18791879
Radu Voicilas
18801880
Alex Volkov
1881+
Illia Volochii
18811882
Ruben Vorderman
18821883
Guido Vranken
18831884
Martijn Vries
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Use the :c:func:`copy_file_range` Linux system call in :func:`shutil.copy`,
2+
:func:`shutil.copy2`, and :func:`shutil.copyfile` functions by default. The
3+
system call gives filesystems an opportunity to implement the use of
4+
copy-on-write or server-side copy. The functions have a new *allow_reflink*
5+
argument to control the functionality. Patch by Illia Volochii.

0 commit comments

Comments
 (0)
Please sign in to comment.