Skip to content

Commit

Permalink
Tiered early abort, zstd edition.
Browse files Browse the repository at this point in the history
It turns out that "do LZ4 and zstd-1 both fail" is a great heuristic
for "don't even bother trying higher zstd tiers".

By way of illustration:
$ cat /incompress | mbuffer | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_normal
summary: 39.8 GiByte in  3min 40.2sec - average of  185 MiB/s
$ echo 3 | sudo tee /sys/module/zzstd/parameters/zstd_lz4_pass
3
$ cat /incompress | mbuffer -m 4G | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_patched
summary: 39.8 GiByte in 48.6sec - average of  839 MiB/s
$ sudo zfs list -p -o name,used,lused,ratio evenfaster/lowcomp_1M_zstd12_normal evenfaster/lowcomp_1M_zstd12_patched
NAME                                         USED        LUSED  RATIO
evenfaster/lowcomp_1M_zstd12_normal   39549931520  42721221632   1.08
evenfaster/lowcomp_1M_zstd12_patched  39626399744  42721217536   1.07
$ python3 -c "print(39626399744 - 39549931520)"
76468224
$

I'll take 76 MB out of 42 GB for > 4x speedup.

Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
  • Loading branch information
rincebrain committed Mar 17, 2022
1 parent 2feba9a commit 52d9d68
Show file tree
Hide file tree
Showing 17 changed files with 206 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ cstyle:
! -path './module/zstd/lib/*' \
! -path './include/sys/lua/*' \
! -path './module/lua/l*.[ch]' \
! -path './module/zfs/lz4.c' \
! -path './module/zcommon/lz4.c' \
$(cstyle_line)

filter_executable = -exec test -x '{}' \; -print
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ AC_CONFIG_FILES([
module/lua/Makefile
module/nvpair/Makefile
module/os/linux/spl/Makefile
module/os/linux/zcommon/Makefile
module/os/linux/zfs/Makefile
module/spl/Makefile
module/unicode/Makefile
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ VPATH = \
$(top_srcdir)/module/zfs \
$(top_srcdir)/module/zcommon \
$(top_srcdir)/module/lua \
$(top_srcdir)/module/os/linux/zcommon \
$(top_srcdir)/module/os/linux/zfs \
$(top_srcdir)/lib/libzpool

Expand Down
2 changes: 2 additions & 0 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ kernel_init(int mode)
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
(mode & SPA_MODE_WRITE) ? get_system_hostid() : 0);

zfs_dbgmsg_init();
random_init();

VERIFY0(uname(&hw_utsname));
Expand Down Expand Up @@ -854,6 +855,7 @@ kernel_fini(void)
system_taskq_fini();

random_fini();
zfs_dbgmsg_fini();
}

uid_t
Expand Down
9 changes: 9 additions & 0 deletions module/os/linux/zcommon/Makefile.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#
# Linux specific sources included from module/zcommon/Makefile.in
#

# Suppress unused-value warnings in sparc64 architecture headers
ccflags-$(CONFIG_SPARC64) += -Wno-unused-value

$(MODULE)-objs += ../os/linux/zcommon/zfs_debug.o
$(MODULE)-objs += ../os/linux/zcommon/trace.o
43 changes: 43 additions & 0 deletions module/os/linux/zcommon/trace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Each DTRACE_PROBE must define its trace point in one (and only one)
* source file, so this dummy file exists for that purpose.
*/

#include <sys/multilist.h>
#include <sys/arc_impl.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/dbuf.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dmu_tx.h>
#include <sys/dnode.h>
#include <sys/zfs_znode.h>
#include <sys/zil_impl.h>

#ifdef _KERNEL
#define CREATE_TRACE_POINTS
#include <sys/trace.h>
#include <sys/trace_dbgmsg.h>
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ static procfs_list_t zfs_dbgmsgs;
static int zfs_dbgmsg_size = 0;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */

#ifdef ZFS_DEBUG
/*
* Everything except dprintf, set_error, spa, and indirect_remap is on
* by default in debug builds.
*/
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR |
ZFS_DEBUG_INDIRECT_REMAP);
#else
int zfs_flags = 0;
#endif


/*
* Internal ZFS debug messages are enabled by default.
*
Expand Down Expand Up @@ -110,7 +122,6 @@ zfs_dbgmsg_fini(void)
{
procfs_list_uninstall(&zfs_dbgmsgs);
zfs_dbgmsg_purge(0);

/*
* TODO - decide how to make this permanent
*/
Expand Down Expand Up @@ -255,3 +266,10 @@ MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
module_param(zfs_dbgmsg_maxsize, int, 0644);
MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
#endif

EXPORT_SYMBOL(zfs_dbgmsg_enable);
EXPORT_SYMBOL(__dprintf);
EXPORT_SYMBOL(__set_error);
EXPORT_SYMBOL(zfs_dbgmsg_init);
EXPORT_SYMBOL(zfs_dbgmsg_fini);
EXPORT_SYMBOL(zfs_flags);
1 change: 0 additions & 1 deletion module/os/linux/zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ $(MODULE)-objs += ../os/linux/zfs/vdev_disk.o
$(MODULE)-objs += ../os/linux/zfs/vdev_file.o
$(MODULE)-objs += ../os/linux/zfs/zfs_acl.o
$(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o
$(MODULE)-objs += ../os/linux/zfs/zfs_debug.o
$(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o
$(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o
Expand Down
1 change: 0 additions & 1 deletion module/os/linux/zfs/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
#include <sys/trace.h>
#include <sys/trace_acl.h>
#include <sys/trace_arc.h>
#include <sys/trace_dbgmsg.h>
#include <sys/trace_dbuf.h>
#include <sys/trace_dmu.h>
#include <sys/trace_dnode.h>
Expand Down
7 changes: 7 additions & 0 deletions module/zcommon/Makefile.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ifneq ($(KBUILD_EXTMOD),)
src = @abs_srcdir@
obj = @abs_builddir@
mfdir = $(obj)
else
mfdir = $(srctree)/$(src)
endif

MODULE := zcommon
Expand All @@ -11,6 +14,8 @@ obj-$(CONFIG_ZFS) := $(MODULE).o
ccflags-$(CONFIG_SPARC64) += -Wno-unused-value

$(MODULE)-objs += cityhash.o
$(MODULE)-objs += lz4.o
$(MODULE)-objs += lz4_zfs.o
$(MODULE)-objs += zfeature_common.o
$(MODULE)-objs += zfs_comutil.o
$(MODULE)-objs += zfs_deleg.o
Expand All @@ -26,3 +31,5 @@ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
$(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
$(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
$(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o

include $(mfdir)/../os/linux/zcommon/Makefile
1 change: 1 addition & 0 deletions module/zfs/lz4.c → module/zcommon/lz4.c
Original file line number Diff line number Diff line change
Expand Up @@ -985,3 +985,4 @@ int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compres
endOnInputSize, decode_full_block, noDict,
(BYTE*)dest, NULL, 0);
}

6 changes: 6 additions & 0 deletions module/zfs/lz4_zfs.c → module/zcommon/lz4_zfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -878,3 +878,9 @@ lz4_fini(void)
lz4_cache = NULL;
}
}

EXPORT_SYMBOL(lz4_compress_zfs);
EXPORT_SYMBOL(lz4_init);
EXPORT_SYMBOL(lz4_fini);
EXPORT_SYMBOL(lz4_decompress_zfs);

2 changes: 2 additions & 0 deletions module/zcommon/zfs_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,7 @@ EXPORT_SYMBOL(zfs_kfpu_fpregs);
static int __init
zcommon_init(void)
{
zfs_dbgmsg_init();
int error = kfpu_init();
if (error)
return (error);
Expand All @@ -1023,6 +1024,7 @@ zcommon_fini(void)
{
fletcher_4_fini();
kfpu_fini();
zfs_dbgmsg_fini();
}

module_init_early(zcommon_init);
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ $(MODULE)-objs += edonr_zfs.o
$(MODULE)-objs += fm.o
$(MODULE)-objs += gzip.o
$(MODULE)-objs += hkdf.o
$(MODULE)-objs += lz4.o
$(MODULE)-objs += lz4_zfs.o
#$(MODULE)-objs += lz4.o
#$(MODULE)-objs += lz4_zfs.o
$(MODULE)-objs += lzjb.o
$(MODULE)-objs += metaslab.o
$(MODULE)-objs += mmp.o
Expand Down
2 changes: 0 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2287,7 +2287,6 @@ void
dmu_init(void)
{
abd_init();
zfs_dbgmsg_init();
sa_cache_init();
dmu_objset_init();
dnode_init();
Expand All @@ -2309,7 +2308,6 @@ dmu_fini(void)
dnode_fini();
dmu_objset_fini();
sa_cache_fini();
zfs_dbgmsg_fini();
abd_fini();
}

Expand Down
10 changes: 2 additions & 8 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,10 @@ static avl_tree_t spa_l2cache_avl;

spa_mode_t spa_mode_global = SPA_MODE_UNINIT;

#ifdef ZFS_DEBUG
/*
* Everything except dprintf, set_error, spa, and indirect_remap is on
* by default in debug builds.
* dprintf and zfs_flags and friends have been moved to zcommon/zfs_prop.c.
*/
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR |
ZFS_DEBUG_INDIRECT_REMAP);
#else
int zfs_flags = 0;
#endif
extern int zfs_flags;

/*
* zfs_recover can be set to nonzero to attempt to recover from
Expand Down
111 changes: 110 additions & 1 deletion module/zstd/zfs_zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
#include "lib/zstd.h"
#include "lib/common/zstd_errors.h"

int zstd_lz4_pass = 3;
int zstd_lz4_shift = 3;
int zstd_firstpass_level = ZIO_ZSTD_LEVEL_1;

kstat_t *zstd_ksp = NULL;

typedef struct zstd_stats {
Expand Down Expand Up @@ -386,6 +390,7 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
int16_t zstd_level;
zfs_zstdhdr_t *hdr;
ZSTD_CCtx *cctx;
size_t pass_len = 1; /* Default to > 0 to avoid tripping ifs below */

hdr = (zfs_zstdhdr_t *)d_start;

Expand All @@ -399,6 +404,102 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
ASSERT3U(d_len, <=, s_len);
ASSERT3U(zstd_level, !=, 0);

/*
* A zstd early abort heuristic.
*
* - Zeroth, if this is zstd-fast, don't try any of this, just go.
* - First, we try LZ4 compression, and if it doesn't early abort, we
* jump directly to whatever compression level we intended to try.
* - Second, if this is > zstd-3 (because experimentally that was the
* cutoff), we try zstd-1 - if that errors out (usually, but not
* exclusively, if it would overflow), we give up early.
*
* If it works, instead we go on and compress anyway.
*
* Why two passes? LZ4 alone gets you a lot of the way, but on highly
* compressible data, it was losing up to 8.5% of the compressed
* savings versus no early abort, and all the zstd-fast levels are
* worse indications on their own than LZ4, and don't improve the LZ4
* pass noticably if stacked like this.
*/

int newsz = (s_len - (s_len >> zstd_lz4_shift));
if (zstd_lz4_pass > 0 && zstd_level > 0) {
/*
* zfs_dbgmsg("DBG: s_len %x d_len %x zstd_lz4_pass %x "
* "zstd_lz4_shift %x",
* s_len, d_len, zstd_lz4_pass, zstd_lz4_shift);
*/
if (zstd_lz4_pass == 1 || zstd_lz4_pass > 2) {
pass_len = lz4_compress_zfs(s_start, hdr, s_len,
newsz, 0);
if (pass_len < newsz)
goto keep_trying;
}
if (zstd_lz4_pass > 1 && zstd_level > 3) {
cctx = ZSTD_createCCtx_advanced(zstd_malloc);

/*
* Out of kernel memory, gently fall through -
* this will disable compression in
* zio_compress_data
*/
if (!cctx) {
ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
return (s_len);
}

/* Set the compression level */
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel,
zstd_enum_to_level(zstd_firstpass_level,
&zstd_level));

/*
* Use the "magicless" zstd header which saves us 4
* header bytes
*/
ZSTD_CCtx_setParameter(cctx, ZSTD_c_format,
ZSTD_f_zstd1_magicless);

/*
* Disable redundant checksum calculation and content
* size storage since this is already done by ZFS
* itself.
*/
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);


pass_len = ZSTD_compress2(cctx,
hdr,
newsz,
s_start, s_len);

ZSTD_freeCCtx(cctx);

/*
* Error in the compression routine, disable
* compression.
*/
if (ZSTD_isError(pass_len)) {
/*
* zfs_dbgmsg("zstd abort: level %x pass_len %x "
* "newsz %x", zstd_firstpass_level, pass_len,
* newsz);
*/
return (s_len);
}
}
if (pass_len <= 0 || pass_len >= newsz) {
/*
* zfs_dbgmsg("Length abort: pass_len %x newsz %x "
* "zstd_lz4_pass %x", pass_len, newsz,
* zstd_lz4_pass);
*/
return (s_len);
}
}
keep_trying:
cctx = ZSTD_createCCtx_advanced(zstd_malloc);

/*
Expand All @@ -423,9 +524,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);


c_len = ZSTD_compress2(cctx,
hdr->data,
d_len - sizeof (*hdr),
newsz - sizeof (*hdr),
s_start, s_len);

ZSTD_freeCCtx(cctx);
Expand Down Expand Up @@ -783,6 +885,13 @@ ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
ZFS_MODULE_LICENSE("Dual BSD/GPL");
ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");

ZFS_MODULE_PARAM(zfs, zstd_, lz4_pass, INT, ZMOD_RW,
"Try an LZ4 pass to determine whether we should early abort.");
ZFS_MODULE_PARAM(zfs, zstd_, lz4_shift, INT, ZMOD_RW,
"How much savings do you need from LZ4 to conclude it's compressible.");
ZFS_MODULE_PARAM(zfs, zstd_, firstpass_level, INT, ZMOD_RW,
"If trying zstd_fast firstpass, what level to try.");

EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);
EXPORT_SYMBOL(zfs_zstd_decompress);
Expand Down

0 comments on commit 52d9d68

Please sign in to comment.