Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency #12724

Merged
merged 1 commit into from
Nov 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/cmd/mktree/Makefile
tests/zfs-tests/cmd/mmap_exec/Makefile
tests/zfs-tests/cmd/mmap_libaio/Makefile
tests/zfs-tests/cmd/mmap_seek/Makefile
tests/zfs-tests/cmd/mmapwrite/Makefile
tests/zfs-tests/cmd/nvlist_to_lua/Makefile
tests/zfs-tests/cmd/randfree_file/Makefile
Expand Down
18 changes: 18 additions & 0 deletions include/os/freebsd/spl/sys/vnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/syscallsubr.h>
#include <sys/vm.h>
#include <vm/vm_object.h>

typedef struct vop_vector vnodeops_t;
#define VOP_FID VOP_VPTOFH
Expand All @@ -83,6 +85,22 @@ vn_is_readonly(vnode_t *vp)
#define vn_has_cached_data(vp) \
((vp)->v_object != NULL && \
(vp)->v_object->resident_page_count > 0)

static __inline void
vn_flush_cached_data(vnode_t *vp, boolean_t sync)
{
#if __FreeBSD_version > 1300054
if (vm_object_mightbedirty(vp->v_object)) {
#else
if (vp->v_object->flags & OBJ_MIGHTBEDIRTY) {
#endif
int flags = sync ? OBJPC_SYNC : 0;
zfs_vmobject_wlock(vp->v_object);
vm_object_page_clean(vp->v_object, 0, 0, flags);
zfs_vmobject_wunlock(vp->v_object);
}
}

#define vn_exists(vp) do { } while (0)
#define vn_invalid(vp) do { } while (0)
#define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0)
Expand Down
3 changes: 2 additions & 1 deletion include/os/freebsd/zfs/sys/zfs_znode_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ typedef struct zfs_soft_state {
#define Z_ISLNK(type) ((type) == VLNK)
#define Z_ISDIR(type) ((type) == VDIR)

#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp))
#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp))
#define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync)
#define zn_rlimit_fsize(zp, uio) \
vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))

Expand Down
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zfs_znode_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ extern "C" {
#define Z_ISDIR(type) S_ISDIR(type)

#define zn_has_cached_data(zp) ((zp)->z_is_mapped)
#define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync)
#define zn_rlimit_fsize(zp, uio) (0)

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
int dnode_try_claim(objset_t *os, uint64_t object, int slots);
boolean_t dnode_is_dirty(dnode_t *dn);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
Expand Down
2 changes: 1 addition & 1 deletion man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ Allow no-operation writes.
The occurrence of nopwrites will further depend on other pool properties
.Pq i.a. the checksumming and compression algorithms .
.
.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 0 Ns | ns 1 Pq int
.It Sy zfs_dmu_offset_next_sync Ns = Ns Sy 0 Ns | Ns 1 Pq int
Enable forcing TXG sync to find holes.
When enabled forces ZFS to act like prior versions when
.Sy SEEK_HOLE No or Sy SEEK_DATA
Expand Down
53 changes: 26 additions & 27 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2093,42 +2093,41 @@ int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{
dnode_t *dn;
int i, err;
boolean_t clean = B_TRUE;
int err;

restart:
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);

/*
* Check if dnode is dirty
*/
for (i = 0; i < TXG_SIZE; i++) {
if (multilist_link_active(&dn->dn_dirty_link[i])) {
clean = B_FALSE;
break;
}
}
rw_enter(&dn->dn_struct_rwlock, RW_READER);

/*
* If compatibility option is on, sync any current changes before
* we go trundling through the block pointers.
*/
if (!clean && zfs_dmu_offset_next_sync) {
clean = B_TRUE;
dnode_rele(dn, FTAG);
txg_wait_synced(dmu_objset_pool(os), 0);
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
}
if (dnode_is_dirty(dn)) {
/*
* If the zfs_dmu_offset_next_sync module option is enabled
* then strict hole reporting has been requested. Dirty
* dnodes must be synced to disk to accurately report all
* holes. When disabled (the default) dirty dnodes are
* reported to not have any holes which is always safe.
*
* When called by zfs_holey_common() the zp->z_rangelock
* is held to prevent zfs_write() and mmap writeback from
* re-dirtying the dnode after txg_wait_synced().
*/
if (zfs_dmu_offset_next_sync) {
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
txg_wait_synced(dmu_objset_pool(os), 0);
goto restart;
}

if (clean)
err = dnode_next_offset(dn,
(hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
else
err = SET_ERROR(EBUSY);
} else {
err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK |
(hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
}

rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);

return (err);
Expand Down
20 changes: 20 additions & 0 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,26 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
slots, NULL, NULL));
}

/*
* Checks if the dnode contains any uncommitted dirty records.
*/
boolean_t
dnode_is_dirty(dnode_t *dn)
{
mutex_enter(&dn->dn_mtx);

for (int i = 0; i < TXG_SIZE; i++) {
if (list_head(&dn->dn_dirty_records[i]) != NULL) {
mutex_exit(&dn->dn_mtx);
return (B_TRUE);
}
}

mutex_exit(&dn->dn_mtx);

return (B_FALSE);
}

void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
Expand Down
9 changes: 8 additions & 1 deletion module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
static int
zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
{
zfs_locked_range_t *lr;
uint64_t noff = (uint64_t)*off; /* new offset */
uint64_t file_sz;
int error;
Expand All @@ -100,12 +101,18 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
else
hole = B_FALSE;

/* Flush any mmap()'d data to disk */
if (zn_has_cached_data(zp))
zn_flush_cached_data(zp, B_FALSE);

lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
zfs_rangelock_exit(lr);

if (error == ESRCH)
return (SET_ERROR(ENXIO));

/* file was dirty, so fall back to using generic logic */
/* File was dirty, so fall back to using generic logic */
if (error == EBUSY) {
if (hole)
*off = file_sz;
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
tags = ['functional', 'migration']

[tests/functional/mmap]
tests = ['mmap_write_001_pos', 'mmap_read_001_pos']
tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos']
tags = ['functional', 'mmap']

[tests/functional/mount]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/cmd/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SUBDIRS = \
mktree \
mmap_exec \
mmap_libaio \
mmap_seek \
mmapwrite \
nvlist_to_lua \
randwritecomp \
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/cmd/mmap_seek/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/mmap_seek
6 changes: 6 additions & 0 deletions tests/zfs-tests/cmd/mmap_seek/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include $(top_srcdir)/config/Rules.am

pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin

pkgexec_PROGRAMS = mmap_seek
mmap_seek_SOURCES = mmap_seek.c
147 changes: 147 additions & 0 deletions tests/zfs-tests/cmd/mmap_seek/mmap_seek.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
*/

#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <errno.h>

static void
seek_data(int fd, off_t offset, off_t expected)
{
off_t data_offset = lseek(fd, offset, SEEK_DATA);
if (data_offset != expected) {
fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n",
(int)offset, (int)data_offset, (int)expected);
exit(2);
}
}

static void
seek_hole(int fd, off_t offset, off_t expected)
{
off_t hole_offset = lseek(fd, offset, SEEK_HOLE);
if (hole_offset != expected) {
fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n",
(int)offset, (int)hole_offset, (int)expected);
exit(2);
}
}

int
main(int argc, char **argv)
{
char *execname = argv[0];
char *file_path = argv[1];
char *buf = NULL;
int err;

if (argc != 4) {
(void) printf("usage: %s <file name> <file size> "
"<block size>\n", argv[0]);
exit(1);
}

int fd = open(file_path, O_RDWR | O_CREAT, 0666);
if (fd == -1) {
(void) fprintf(stderr, "%s: %s: ", execname, file_path);
perror("open");
exit(2);
}

off_t file_size = atoi(argv[2]);
off_t block_size = atoi(argv[3]);

if (block_size * 2 > file_size) {
(void) fprintf(stderr, "file size must be at least "
"double the block size\n");
exit(2);
}

err = ftruncate(fd, file_size);
if (err == -1) {
perror("ftruncate");
exit(2);
}

if ((buf = mmap(NULL, file_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0)) == MAP_FAILED) {
perror("mmap");
exit(2);
}

/* Verify the file is sparse and reports no data. */
seek_data(fd, 0, -1);

/* Verify the file is reported as a hole. */
seek_hole(fd, 0, 0);

/* Verify search beyond end of file is an error. */
seek_data(fd, 2 * file_size, -1);
seek_hole(fd, 2 * file_size, -1);

/* Dirty the first byte. */
memset(buf, 'a', 1);
seek_data(fd, 0, 0);
seek_data(fd, block_size, -1);
seek_hole(fd, 0, block_size);
seek_hole(fd, block_size, block_size);

/* Dirty the first half of the file. */
memset(buf, 'b', file_size / 2);
seek_data(fd, 0, 0);
seek_data(fd, block_size, block_size);
seek_hole(fd, 0, P2ROUNDUP(file_size / 2, block_size));
seek_hole(fd, block_size, P2ROUNDUP(file_size / 2, block_size));

/* Dirty the whole file. */
memset(buf, 'c', file_size);
seek_data(fd, 0, 0);
seek_data(fd, file_size * 3 / 4,
P2ROUNDUP(file_size * 3 / 4, block_size));
seek_hole(fd, 0, file_size);
seek_hole(fd, file_size / 2, file_size);

/* Punch a hole (required compression be enabled). */
memset(buf + block_size, 0, block_size);
seek_data(fd, 0, 0);
seek_data(fd, block_size, 2 * block_size);
seek_hole(fd, 0, block_size);
seek_hole(fd, block_size, block_size);
seek_hole(fd, 2 * block_size, file_size);

err = munmap(buf, file_size);
if (err == -1) {
perror("munmap");
exit(2);
}

close(fd);

return (0);
}
1 change: 1 addition & 0 deletions tests/zfs-tests/include/commands.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ export ZFSTEST_FILES='badsend
mktree
mmap_exec
mmap_libaio
mmap_seek
mmapwrite
nvlist_to_lua
randfree_file
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/include/tunables.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode
DEADMAN_SYNCTIME_MS deadman.synctime_ms zfs_deadman_synctime_ms
DEADMAN_ZIOTIME_MS deadman.ziotime_ms zfs_deadman_ziotime_ms
DISABLE_IVSET_GUID_CHECK disable_ivset_guid_check zfs_disable_ivset_guid_check
DMU_OFFSET_NEXT_SYNC dmu_offset_next_sync zfs_dmu_offset_next_sync
INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size
INITIALIZE_VALUE initialize_value zfs_initialize_value
KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps_at_export
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/mmap/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
mmap_read_001_pos.ksh \
mmap_write_001_pos.ksh \
mmap_libaio_001_pos.ksh
mmap_libaio_001_pos.ksh \
mmap_seek_001_pos.ksh

dist_pkgdata_DATA = \
mmap.cfg
Loading