Make use of kvmalloc if available and fix vmem_alloc implementation

This patch implements use of kvmalloc for GFP_KERNEL allocations, which may increase performance if the allocator is able to allocate physical memory, if kvmalloc is available as a public kernel interface (since v4.12). Otherwise it will simply fall back to virtual memory (vmalloc). Also fix vmem_alloc implementation which can lead to slow allocations since the first attempt with kmalloc does not make use of the noretry flag but tells the linux kernel to retry several times before it fails. Signed-off-by: Sebastian Gottschall <s.gottschall@dd-wrt.com> Signed-off-by: Michael Niewöhner <foss@mniewoehner.de>
openzfs · Jul 18, 2019 · 4df62eb · 4df62eb
1 parent e9513c5
commit 4df62eb
Show file tree

Hide file tree

Showing 5 changed files with 86 additions and 14 deletions.
diff --git a/config/kernel-kmem.m4 b/config/kernel-kmem.m4
@@ -56,3 +56,24 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [
 	AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
 	AC_MSG_RESULT([$enable_debug_kmem_tracking])
 ])
+
+dnl #
+dnl # 4.12 API
+dnl #
+dnl # kvmalloc allocation strategy
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [
+	AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available])
+	ZFS_LINUX_TRY_COMPILE([
+		#include <linux/mm.h>
+	],[
+		void *p __attribute__ ((unused));
+
+		p = kvmalloc(0, GFP_KERNEL);
+	],[
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
@@ -25,6 +25,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_KERNEL_3ARGS_VFS_GETATTR
 	ZFS_AC_KERNEL_2ARGS_VFS_GETATTR
 	ZFS_AC_KERNEL_USLEEP_RANGE
+	ZFS_AC_KERNEL_KVMALLOC
 	ZFS_AC_KERNEL_KMEM_CACHE_ALLOCFLAGS
 	ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY
 	ZFS_AC_KERNEL_WAIT_ON_BIT

diff --git a/include/spl/sys/kmem.h b/include/spl/sys/kmem.h
@@ -28,6 +28,10 @@
 #include <sys/debug.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+void *spl_kvmalloc(size_t size, gfp_t flags);
 
 extern int kmem_debugging(void);
 extern char *kmem_vasprintf(const char *fmt, va_list ap);

diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c
@@ -203,7 +203,21 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 		ASSERT(ISP2(size));
 		ptr = (void *)__get_free_pages(lflags, get_order(size));
 	} else {
-		ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+		/*
+		 * GFP_KERNEL allocations can safely use kvmalloc which may
+		 * improve performance by avoiding a) high latency caused by
+		 * vmalloc's on-access allocation, b) performance loss due to
+		 * MMU memory address mapping and c) vmalloc locking overhead.
+		 * This has the side-effect that the slab statistics will
+		 * incorrectly report this as a vmem allocation, but that is
+		 * purely cosmetic.
+		 */
+		if ((lflags & GFP_KERNEL) == GFP_KERNEL) {
+			ptr = spl_kvmalloc(size, lflags);
+		} else {
+			ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
+			    PAGE_KERNEL);
+		}
 	}
 
 	/* Resulting allocated memory will be page aligned */
@@ -231,7 +245,7 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 		ASSERT(ISP2(size));
 		free_pages((unsigned long)ptr, get_order(size));
 	} else {
-		vfree(ptr);
+		spl_kmem_free_impl(ptr, size);
 	}
 }
 

diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c
@@ -133,6 +133,32 @@ strfree(char *str)
 }
 EXPORT_SYMBOL(strfree);
 
+#ifndef __GFP_REPEAT
+#define	__GFP_REPEAT	__GFP_RETRY_MAYFAIL
+#endif
+void *
+spl_kvmalloc(size_t size, gfp_t flags)
+{
+	gfp_t kmalloc_flags = flags;
+	void *ret;
+#ifdef HAVE_KVMALLOC
+	if ((flags & GFP_KERNEL) == GFP_KERNEL) {
+		return (kvmalloc(size, flags));
+	}
+#endif
+	if (size > PAGE_SIZE) {
+		kmalloc_flags |= __GFP_NOWARN;
+		if (!(kmalloc_flags & __GFP_REPEAT) ||
+		    (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+			kmalloc_flags |= __GFP_NORETRY;
+		}
+	}
+	ret = kmalloc_node(size, kmalloc_flags, NUMA_NO_NODE);
+	if (ret || size <= PAGE_SIZE)
+		return (ret);
+	return (__vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL));
+}
+
 /*
  * General purpose unified implementation of kmem_alloc(). It is an
  * amalgamation of Linux and Illumos allocator design. It should never be
@@ -144,7 +170,6 @@ inline void *
 spl_kmem_alloc_impl(size_t size, int flags, int node)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
-	int use_vmem = 0;
 	void *ptr;
 
 	/*
@@ -177,30 +202,37 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
 		 * acquired.  Contention on this lock can significantly
 		 * impact performance so frequently manipulating the virtual
 		 * address space is strongly discouraged.
+		 *
+		 * GFP_KERNEL allocations can safely use kvmalloc which may
+		 * improve performance by avoiding a) high latency caused by
+		 * vmalloc's on-access allocation, b) performance loss due to
+		 * MMU memory address mapping and c) vmalloc locking overhead.
+		 * This has the side-effect that the slab statistics will
+		 * incorrectly report this as a vmem allocation, but that is
+		 * purely cosmetic.
+		 *
+		 * However, kvmalloc will only be used when a) sufficient
+		 * contiguous memory is available and b) the allocated memory
+		 * is less than spl_kmem_alloc_max.
 		 */
-		if ((size > spl_kmem_alloc_max) || use_vmem) {
+		if (size > spl_kmem_alloc_max) {
 			if (flags & KM_VMEM) {
 				ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
 				    PAGE_KERNEL);
 			} else {
 				return (NULL);
 			}
 		} else {
-			ptr = kmalloc_node(size, lflags, node);
+			if (flags & KM_VMEM) {
+				ptr = spl_kvmalloc(size, lflags);
+			} else {
+				ptr = kmalloc_node(size, lflags, node);
+			}
 		}
 
 		if (likely(ptr) || (flags & KM_NOSLEEP))
 			return (ptr);
 
-		/*
-		 * For vmem_alloc() and vmem_zalloc() callers retry immediately
-		 * using __vmalloc() which is unlikely to fail.
-		 */
-		if ((flags & KM_VMEM) && (use_vmem == 0))  {
-			use_vmem = 1;
-			continue;
-		}
-
 		/*
 		 * Use cond_resched() instead of congestion_wait() to avoid
 		 * deadlocking systems where there are no block devices.