From ec2bb368f11036ce9f34bd7dac875a4ea0d3e492 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 24 Sep 2024 15:43:48 +1000 Subject: [PATCH 1/5] Linux 6.12: avoid kmem_cache_create redefinition torvalds/linux@b2e7456b5c25 makes kmem_cache_create() a macro, which gets in the way of our our own redefinition, so we undef the macro first for our own clients. This follows what we did for kmem_cache_alloc(), see e951dba48. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris --- include/os/linux/spl/sys/kmem_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index cfdb0ba750f7..71048db4ec5a 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -200,6 +200,7 @@ extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache); /* Avoid conflicts with kernel names that might be implemented as macros. */ #undef kmem_cache_alloc +#undef kmem_cache_create #define kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) \ spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl) From a0678c6ece999ccce9d29fa9a1eea1cddd6e96be Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 24 Sep 2024 16:06:14 +1000 Subject: [PATCH 2/5] Linux 6.12: support 3arg dequeue_signal() without task param See torvalds/linux@a2b80ce87a87. It claims the task arg is always `current`, and so it is with us, so this is a safe change to make. The only spanner is that we also support the older pre-5.17 3-arg dequeue_signal() which had different meaning, so we have to check the types to get the right one. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris --- config/kernel-kthread.m4 | 37 ++++++++++++++++++++++++++------ module/os/linux/spl/spl-thread.c | 18 +++++++++------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/config/kernel-kthread.m4 b/config/kernel-kthread.m4 index f5b824d7947a..4d580efead6b 100644 --- a/config/kernel-kthread.m4 +++ b/config/kernel-kthread.m4 @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ +AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL], [ dnl # dnl # 5.17 API: enum pid_type * as new 4th dequeue_signal() argument, dnl # 5768d8906bc23d512b1a736c1e198aa833a6daa4 ("signal: Requeue signals in the appropriate queue") @@ -23,12 +23,24 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info); dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); dnl # + dnl # 6.12 API: first arg struct_task* removed + dnl # int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); + dnl # AC_MSG_CHECKING([whether dequeue_signal() takes 4 arguments]) - ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal], [ + ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_4arg], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, [dequeue_signal() takes 4 arguments]) + AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, + [dequeue_signal() takes 4 arguments]) ], [ AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether dequeue_signal() a task argument]) + ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_3arg_task], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DEQUEUE_SIGNAL_3ARG_TASK, 1, + [dequeue_signal() takes a task argument]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) @@ -43,8 +55,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ - ZFS_LINUX_TEST_SRC([kthread_dequeue_signal], [ +AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL], [ + ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_3arg_task], [ + #include + ], [ + struct task_struct *task = NULL; + sigset_t *mask = NULL; + kernel_siginfo_t *info = NULL; + int error __attribute__ ((unused)); + + error = dequeue_signal(task, mask, info); + ]) + + ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_4arg], [ #include ], [ struct task_struct *task = NULL; @@ -59,10 +82,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD], [ ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT - ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG + ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD], [ ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT - ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG + ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL ]) diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index 80acd0201b3b..7f74d44f91ff 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -162,20 +162,22 @@ issig(void) if (!signal_pending(current)) return (0); - struct task_struct *task = current; spl_kernel_siginfo_t __info; sigset_t set; siginitsetinv(&set, 1ULL << (SIGSTOP - 1) | 1ULL << (SIGTSTP - 1)); - sigorsets(&set, &task->blocked, &set); + sigorsets(&set, ¤t->blocked, &set); - spin_lock_irq(&task->sighand->siglock); -#ifdef HAVE_DEQUEUE_SIGNAL_4ARG + spin_lock_irq(¤t->sighand->siglock); +#if defined(HAVE_DEQUEUE_SIGNAL_4ARG) enum pid_type __type; - if (dequeue_signal(task, &set, &__info, &__type) != 0) { + if (dequeue_signal(current, &set, &__info, &__type) != 0) { +#elif defined(HAVE_DEQUEUE_SIGNAL_3ARG_TASK) + if (dequeue_signal(current, &set, &__info) != 0) { #else - if (dequeue_signal(task, &set, &__info) != 0) { + enum pid_type __type; + if (dequeue_signal(&set, &__info, &__type) != 0) { #endif - spin_unlock_irq(&task->sighand->siglock); + spin_unlock_irq(¤t->sighand->siglock); kernel_signal_stop(); /* @@ -188,7 +190,7 @@ issig(void) return (0); } - spin_unlock_irq(&task->sighand->siglock); + spin_unlock_irq(¤t->sighand->siglock); return (1); } From 975a8333a4ec172ba48bbcc566aa3b55cd3d2841 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 24 Sep 2024 16:19:18 +1000 Subject: [PATCH 3/5] Linux 6.12: FMODE_UNSIGNED_OFFSET is now FOP_UNSIGNED_OFFSET torvalds/linux@641bb4394f40 asserts that this is a static flag, not intended to be variable per-file, so it moves it to file_operations instead. We just change our check to follow. No configure check is necessary because FOP_UNSIGNED_OFFSET didn't exist before this commit, and FMODE_UNSIGNED_OFFSET flag is removed in the same commit, so there's no chance of a conflict. It's not clear to me that we need this check at all, as we never set this flag on our own files, and I can't see any way that our llseek handler could recieve a file from another filesystem. But, the whole zpl_llseek() has a number of opportunities for pleasing cleanup that are nothing to do with this change, so I'll leave that for a future change. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris --- include/os/linux/kernel/linux/vfs_compat.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index 075b9e111b10..b4767d1ece89 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -68,7 +68,11 @@ lseek_execute( loff_t offset, loff_t maxsize) { +#ifdef FMODE_UNSIGNED_OFFSET if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET)) +#else + if (offset < 0 && !(filp->f_op->fop_flags & FOP_UNSIGNED_OFFSET)) +#endif return (-EINVAL); if (offset > maxsize) From a2db8246104d79f9d718acf824fa7177c6aabb89 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 24 Sep 2024 16:44:01 +1000 Subject: [PATCH 4/5] Linux 6.12: f_version removed from struct file linux/torvalds@11068e0b64cb removes it, suggesting this was a always there as a helper to handle concurrent seeks, which all filesystems now handle themselves if necessary. Without looking into the mechanism, I can imagine how it might have been used, but we have always set it to zero and never read from it, presumably because we've always tracked per-caller position through the znode anyway. So I don't see how there can be any functional change for us by removing it. I've stayed conservative though and left it in for older kernels, since its clearly not hurting anything there. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris --- config/kernel-file.m4 | 31 ++++++++++++++++++++++ config/kernel.m4 | 2 ++ include/os/linux/kernel/linux/vfs_compat.h | 2 ++ 3 files changed, 35 insertions(+) create mode 100644 config/kernel-file.m4 diff --git a/config/kernel-file.m4 b/config/kernel-file.m4 new file mode 100644 index 000000000000..31252544c745 --- /dev/null +++ b/config/kernel-file.m4 @@ -0,0 +1,31 @@ +dnl # +dnl # 6.12 removed f_version from struct file +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_F_VERSION], [ + ZFS_LINUX_TEST_SRC([file_f_version], [ + #include + + static const struct f __attribute__((unused)) = { + .f_version = 0; + }; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE_F_VERSION], [ + AC_MSG_CHECKING([whether file->f_version exists]) + ZFS_LINUX_TEST_RESULT([file_f_version], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILE_F_VERSION, 1, + [file->f_version exists]) + ], [ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILE], [ + ZFS_AC_KERNEL_FILE_F_VERSION +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE], [ + ZFS_AC_KERNEL_SRC_FILE_F_VERSION +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 628581c98abc..bfd8404cdbe2 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -127,6 +127,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING + ZFS_AC_KERNEL_SRC_FILE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -238,6 +239,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING ZFS_AC_KERNEL_1ARG_ASSIGN_STR + ZFS_AC_KERNEL_FILE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index b4767d1ece89..3ed456dbf4c8 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -81,7 +81,9 @@ lseek_execute( if (offset != filp->f_pos) { spin_lock(&filp->f_lock); filp->f_pos = offset; +#ifdef HAVE_FILE_F_VERSION filp->f_version = 0; +#endif spin_unlock(&filp->f_lock); } From b67b5af1c90b354ecec0ff064c4411230d041abe Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 24 Sep 2024 17:17:00 +1000 Subject: [PATCH 5/5] Linux 6.12: PG_error flag was removed torvalds/linux@09022bc196d2 removes the flag, and the corresponding SetPageError() and ClearPageError() macros, with no replacement offered. Going back through the upstream history, use of this flag has been gradually removed over the last year as part of the long tail of converting everything to folios. Interesting tidbit comments from torvalds/linux@29e9412b250e and torvalds/linux@420e05d0de18 suggest that this flag has not been used meaningfully since page writeback failures started being recorded in errseq_t instead (the whole "fsyncgate" thing, ~2017, around torvalds/linux@8ed1e46aaf1b). Given that, it's possible that since perhaps Linux 4.13 we haven't been getting anything by setting the flag. I don't know if that's true and/or if there's something we should be doing instead, but my gut feel is that its probably fine we only use the page cache as a proxy to allow mmap() to work, rather than backing IO with it. As such, I'm expecting that removing this will do no harm, but I'm leaving it in for older kernels to maintain status quo, and if there is an overall better way, that is left for a future change. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris --- config/kernel-mm-page-flags.m4 | 24 +++++++++++++++++++++++ config/kernel.m4 | 2 ++ include/os/linux/kernel/linux/mm_compat.h | 14 +++++++++++++ module/os/linux/zfs/zfs_znode_os.c | 1 + 4 files changed, 41 insertions(+) create mode 100644 config/kernel-mm-page-flags.m4 diff --git a/config/kernel-mm-page-flags.m4 b/config/kernel-mm-page-flags.m4 new file mode 100644 index 000000000000..b1277118305d --- /dev/null +++ b/config/kernel-mm-page-flags.m4 @@ -0,0 +1,24 @@ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_FLAG_ERROR], [ + ZFS_LINUX_TEST_SRC([mm_page_flag_error], [ + #include + + static enum pageflags + test_flag __attribute__((unused)) = PG_error; + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_FLAG_ERROR], [ + AC_MSG_CHECKING([whether PG_error flag is available]) + ZFS_LINUX_TEST_RESULT([mm_page_flag_error], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MM_PAGE_FLAG_ERROR, 1, [PG_error flag is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS], [ + ZFS_AC_KERNEL_SRC_MM_PAGE_FLAG_ERROR +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_FLAGS], [ + ZFS_AC_KERNEL_MM_PAGE_FLAG_ERROR +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index bfd8404cdbe2..556df58082f9 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -125,6 +125,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV + ZFS_AC_KERNEL_SRC_MM_PAGE_FLAGS ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING ZFS_AC_KERNEL_SRC_FILE @@ -236,6 +237,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV + ZFS_AC_KERNEL_MM_PAGE_FLAGS ZFS_AC_KERNEL_MM_PAGE_SIZE ZFS_AC_KERNEL_MM_PAGE_MAPPING ZFS_AC_KERNEL_1ARG_ASSIGN_STR diff --git a/include/os/linux/kernel/linux/mm_compat.h b/include/os/linux/kernel/linux/mm_compat.h index 817f6df422de..4cdc0e099994 100644 --- a/include/os/linux/kernel/linux/mm_compat.h +++ b/include/os/linux/kernel/linux/mm_compat.h @@ -40,4 +40,18 @@ #define page_mapping(p) folio_mapping(page_folio(p)) #endif +/* + * 6.12 removed PG_error, SetPageError and ClearPageError, with no direct + * replacement, because page writeback errors are recorded elsewhere. Since we + * only use the page cache to assist with mmap(), never directly backing it + * with IO, it shouldn't be possible for this condition to occur on our pages + * anyway, even if this is the right way to report it. So it should be safe + * to remove, but for avoidance of doubt, we make it a no-op on 6.12 and leave + * it for everything else. + */ +#ifndef HAVE_MM_PAGE_FLAG_ERROR +#define SetPageError(p) do {} while (0) +#define ClearPageError(p) do {} while (0) +#endif + #endif /* _ZFS_MM_COMPAT_H */ diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index cea15ad4bc96..bc1e17f086d9 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_comutil.h"