diff --git a/.circleci/config.yml b/.circleci/config.yml
index 9738c1bd45b..4529743ea04 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,9 +31,9 @@ jobs:
command: |
make gnu90build; make clean
make gnu99build; make clean
- make ppc64build; make clean
- make ppcbuild ; make clean
- make armbuild ; make clean
+ make ppc64build V=1; make clean
+ make ppcbuild V=1; make clean
+ make armbuild V=1; make clean
make -C tests test-legacy test-longmatch; make clean
make -C lib libzstd-nomt; make clean
# This step is only run on release tags.
diff --git a/.cirrus.yml b/.cirrus.yml
index 8387ca1b1a5..fe17aacea5d 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -2,7 +2,7 @@ task:
name: FreeBSD (shortest)
freebsd_instance:
matrix:
- image_family: freebsd-12-1
+ image_family: freebsd-12-2
# The stable 11.3 image causes "Agent is not responding" so use a snapshot
image_family: freebsd-11-3-snap
install_script: pkg install -y gmake coreutils
diff --git a/.github/workflows/generic-dev.yml b/.github/workflows/generic-dev.yml
index 8882fcc0b6a..d6e9b44b426 100644
--- a/.github/workflows/generic-dev.yml
+++ b/.github/workflows/generic-dev.yml
@@ -41,18 +41,17 @@ jobs:
- uses: actions/checkout@v2
- name: make check on 32-bit
run: |
- make libc6install
+ sudo apt update
+ APT_PACKAGES="gcc-multilib" make apt-install
CFLAGS="-m32 -O1 -fstack-protector" make check V=1
- gcc-6-7-libzstd:
+ gcc-7-libzstd:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- - name: gcc-6 + gcc-7 + libzstdmt compilation
+ - name: gcc-7 + libzstdmt compilation
run: |
- make gcc6install gcc7install
- CC=gcc-6 CFLAGS=-Werror make -j all
- make clean
+ make gcc7install
CC=gcc-7 CFLAGS=-Werror make -j all
make clean
LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
@@ -71,30 +70,28 @@ jobs:
make gcc8install
CC=gcc-8 CFLAGS="-Werror" make -j all
make clean
- CC=gcc-8 make -j uasan-test-zstd
+ Reference a prepared dictionary, to be used for all next compressed frames.
Note that compression parameters are enforced from within CDict,
and supersede any compression parameter previously set within CCtx.
- The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
The dictionary will remain valid for future compressed frames using same CCtx.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -867,6 +869,13 @@ Reference a prepared dictionary, to be used to decompress next frames.
The dictionary remains active for decompression of future frames using same DCtx.
+
+ If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ will store the DDict references in a table, and the DDict used for decompression
+ will be determined at decompression time, as per the dict ID in the frame.
+ The memory for the table is allocated on the first call to refDDict, and can be
+ freed with ZSTD_freeDCtx().
+
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Note 1 : Currently, only one dictionary can be managed.
Referencing a new dictionary effectively "discards" any previous one.
@@ -995,6 +1004,12 @@ zstd 1.4.8 Manual
+zstd 1.4.9 Manual
Contents
@@ -473,12 +473,14 @@
Decompression context
When decompressing many times,
* ZSTD_d_format
* ZSTD_d_stableOutBuffer
* ZSTD_d_forceIgnoreChecksum
+ * ZSTD_d_refMultipleDDicts
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
ZSTD_d_experimentalParam1=1000,
ZSTD_d_experimentalParam2=1001,
- ZSTD_d_experimentalParam3=1002
+ ZSTD_d_experimentalParam3=1002,
+ ZSTD_d_experimentalParam4=1003
} ZSTD_dParameter;
@@ -816,7 +818,7 @@ Streaming decompression functions
Streaming decompression functions
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
Streaming decompression functions
ZSTD_d_ignoreChecksum = 1
} ZSTD_forceIgnoreChecksum_e;
+typedef enum {
+ /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+ ZSTD_rmd_refSingleDDict = 0,
+ ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
typedef enum {
/* Note: this enum and the behavior it controls are effectively internal
* implementation details of the compressor. They are expected to continue
@@ -1073,7 +1088,7 @@
Streaming decompression functions
`srcSize` must be the _exact_ size of this series
(i.e. there should be a frame boundary at `src + srcSize`)
@return : - upper-bound for the decompressed size of all data in all successive frames
- - if an error occured: ZSTD_CONTENTSIZE_ERROR
+ - if an error occurred: ZSTD_CONTENTSIZE_ERROR
note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame.
note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
@@ -1155,6 +1170,22 @@ Streaming decompression functions
size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); +Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + + Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + + Returns an error if destination buffer is not large enough, if the source size is not representable + with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + + @return : number of bytes written or a ZSTD error. + +
size_t ZSTD_estimateCCtxSize(int compressionLevel); @@ -1328,7 +1359,7 @@Streaming decompression functions
how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?)
size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);Get the requested compression parameter value, selected by enum ZSTD_cParameter, and store it into int* value. @return : 0, or an error code (which can be tested with ZSTD_isError()). @@ -1382,7 +1413,7 @@
Streaming decompression functions
-size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);Similar to ZSTD_CCtx_getParameter. Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. @result : 0, or an error code (which can be tested with ZSTD_isError()). diff --git a/examples/Makefile b/examples/Makefile index f5e3274b1aa..1787bf291de 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/examples/common.h b/examples/common.h index 4492c7e4efa..a3f743ba3bb 100644 --- a/examples/common.h +++ b/examples/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c index d9aad45a7b0..559977045da 100644 --- a/examples/dictionary_compression.c +++ b/examples/dictionary_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021 Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c index 7e50986e37a..6bf77050e29 100644 --- a/examples/dictionary_decompression.c +++ b/examples/dictionary_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c index e409467b226..e03ce86805e 100644 --- a/examples/multiple_simple_compression.c +++ b/examples/multiple_simple_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c index 8a4dc96c112..5a92a31200c 100644 --- a/examples/multiple_streaming_compression.c +++ b/examples/multiple_streaming_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/simple_compression.c b/examples/simple_compression.c index 618080b338f..3d6c3ae9b67 100644 --- a/examples/simple_compression.c +++ b/examples/simple_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c index e108987c625..4a179d18548 100644 --- a/examples/simple_decompression.c +++ b/examples/simple_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index 045437f2873..6a039dc6483 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c index 22c3b2efacc..7489eae9377 100644 --- a/examples/streaming_compression_thread_pool.c +++ b/examples/streaming_compression_thread_pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Martin Liska, SUSE, Facebook, Inc. + * Copyright (c) 2021, Martin Liska, SUSE, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 26eda3441b7..3f8e46002b5 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c index 37dd660e4a6..8c1e9be61c3 100644 --- a/examples/streaming_memory_usage.c +++ b/examples/streaming_memory_usage.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/Makefile b/lib/Makefile index 869d76630e6..39ff304f5cc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -179,6 +179,8 @@ ifeq ($(UNAME), Darwin) HASH ?= md5 else ifeq ($(UNAME), FreeBSD) HASH ?= gmd5sum +else ifeq ($(UNAME), NetBSD) + HASH ?= md5 -n else ifeq ($(UNAME), OpenBSD) HASH ?= md5 endif @@ -208,20 +210,17 @@ else endif SET_CACHE_DIRECTORY = \ - $(MAKE) --no-print-directory $@ \ + +$(MAKE) --no-print-directory $@ \ BUILD_DIR=obj/$(HASH_DIR) \ CPPFLAGS="$(CPPFLAGS)" \ CFLAGS="$(CFLAGS)" \ LDFLAGS="$(LDFLAGS)" -.PHONY: lib-all all clean install uninstall - -# alias -lib-all: all - +.PHONY: all all: lib + .PHONY: libzstd.a # must be run every time ifndef BUILD_DIR @@ -258,8 +257,8 @@ else # not Windows LIBZSTD = libzstd.$(SHARED_EXT_VER) .PHONY: $(LIBZSTD) # must be run every time -$(LIBZSTD): CFLAGS += -fPIC -$(LIBZSTD): LDFLAGS += -shared -fvisibility=hidden +$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden +$(LIBZSTD): LDFLAGS += -shared ifndef BUILD_DIR # determine BUILD_DIR from compilation flags @@ -339,6 +338,7 @@ libzstd-nomt: $(ZSTD_NOMT_FILES) @echo files : $(ZSTD_NOMT_FILES) $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ +.PHONY: clean clean: $(RM) -r *.dSYM # macOS-specific $(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc @@ -407,6 +407,7 @@ libzstd.pc: libzstd.pc.in -e 's|@VERSION@|$(VERSION)|' \ $< >$@ +.PHONY: install install: install-pc install-static install-shared install-includes @echo zstd static and shared library installed @@ -437,6 +438,7 @@ install-includes: $(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR) $(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR) +.PHONY: uninstall uninstall: $(RM) $(DESTDIR)$(LIBDIR)/libzstd.a $(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index d9a2730104d..aa526b429f0 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -1,7 +1,7 @@ /* ****************************************************************** * bitstream * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 3e454f38c12..1142002b0a3 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -90,6 +90,7 @@ # endif #endif + /* target attribute */ #ifndef __has_attribute #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ diff --git a/lib/common/cpu.h b/lib/common/cpu.h index cb210593ead..1b4d26dda95 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Facebook, Inc. + * Copyright (c) 2018-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/debug.c b/lib/common/debug.c index f303f4a2e53..cd1742c74a4 100644 --- a/lib/common/debug.c +++ b/lib/common/debug.c @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/debug.h b/lib/common/debug.h index 8b5734366ca..b45cda8f0c7 100644 --- a/lib/common/debug.h +++ b/lib/common/debug.h @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index f9fcb1acfc3..033c075543c 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 45bba5305b5..b6db3801857 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/error_private.h b/lib/common/error_private.h index 71b37b8dfa4..9d36e891c06 100644 --- a/lib/common/error_private.h +++ b/lib/common/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/fse.h b/lib/common/fse.h index dd5fc44e809..52924469c9a 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -1,7 +1,7 @@ /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index c164430f990..c575db0de31 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/huf.h b/lib/common/huf.h index 1afef90c7ca..43213e574ec 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman codec, * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/mem.h b/lib/common/mem.h index 4728ef767bf..9813bfc4235 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/pool.c b/lib/common/pool.c index 4c1b83376f4..5b092ccd220 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/pool.h b/lib/common/pool.h index 63954ca6ca4..d66942a0a60 100644 --- a/lib/common/pool.h +++ b/lib/common/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c index e708df3c338..f0ac38135bf 100644 --- a/lib/common/xxhash.c +++ b/lib/common/xxhash.c @@ -1,6 +1,6 @@ /* * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h index eceb55d5e07..31c488bf5f4 100644 --- a/lib/common/xxhash.h +++ b/lib/common/xxhash.h @@ -1,7 +1,7 @@ /* * xxHash - Extremely Fast Hash algorithm * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - xxHash source repository : https://github.com/Cyan4973/xxHash diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c index 939e9f08fad..009b466007f 100644 --- a/lib/common/zstd_common.c +++ b/lib/common/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_deps.h b/lib/common/zstd_deps.h index 0fb8b7818b8..eebc4fdfbdd 100644 --- a/lib/common/zstd_deps.h +++ b/lib/common/zstd_deps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h index 6d0d0030043..3ba57e1521f 100644 --- a/lib/common/zstd_errors.h +++ b/lib/common/zstd_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 0991f20a086..53a982c7341 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_trace.c b/lib/common/zstd_trace.c new file mode 100644 index 00000000000..36f3b5d69bd --- /dev/null +++ b/lib/common/zstd_trace.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_trace.h" +#include "../zstd.h" + +#include "compiler.h" + +#if ZSTD_TRACE && ZSTD_HAVE_WEAK_SYMBOLS + +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx) +{ + (void)cctx; + return 0; +} + +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + (void)ctx; + (void)trace; +} + +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx) +{ + (void)dctx; + return 0; +} + +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + (void)ctx; + (void)trace; +} + +#endif diff --git a/lib/common/zstd_trace.h b/lib/common/zstd_trace.h new file mode 100644 index 00000000000..487617cf634 --- /dev/null +++ b/lib/common/zstd_trace.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include
+ +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 304a82b3cc4..887bdc85864 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy encoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/hist.c b/lib/compress/hist.c index a9659d11ad0..9af1aa1a361 100644 --- a/lib/compress/hist.c +++ b/lib/compress/hist.c @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/hist.h b/lib/compress/hist.h index fb9ead6834f..43c578799bb 100644 --- a/lib/compress/hist.h +++ b/lib/compress/hist.h @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 302e08864da..71438d7a86d 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Huffman encoder, part of New Generation Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 386b051df63..93c4075c521 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -14,6 +14,7 @@ #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ #include "../common/cpu.h" #include "../common/mem.h" +#include "../common/zstd_trace.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ #include "../common/fse.h" @@ -269,29 +270,46 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) return 0; } +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; +} + size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); - assert(!ZSTD_checkCParams(params.cParams)); - cctxParams->cParams = params.cParams; - cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); return 0; } -/* ZSTD_assignParamsToCCtxParams() : - * params is presumed valid at this stage */ -static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) { - ZSTD_CCtx_params ret = *cctxParams; assert(!ZSTD_checkCParams(params->cParams)); - ret.cParams = params->cParams; - ret.fParams = params->fParams; - ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return ret; + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; } ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) @@ -796,13 +814,13 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, } } -size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) { return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); } size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) { switch(param) { @@ -1188,15 +1206,26 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); - if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) - srcSize = minSrcSize; - switch (mode) { - case ZSTD_cpm_noAttachDict: case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; break; case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ dictSize = 0; break; default: @@ -1213,7 +1242,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, ZSTD_highbit32(tSize-1) + 1; if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; } - { U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; if (cycleLog > dictAndWindowLog) @@ -1615,7 +1645,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashRateLog < 32); - zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); } { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); @@ -1692,6 +1721,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, XXH64_reset(&zc->xxhState, 0); zc->stage = ZSTDcs_init; zc->dictID = 0; + zc->dictContentSize = 0; ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); @@ -1711,11 +1741,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, /* ldm bucketOffsets table */ if (params.ldmParams.enableLdm) { /* TODO: avoid memset? */ - size_t const ldmBucketSize = + size_t const numBuckets = ((size_t)1) << (params.ldmParams.hashLog - params.ldmParams.bucketSizeLog); - zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); } /* sequences storage */ @@ -1852,6 +1882,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, } } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -1915,6 +1946,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -2005,6 +2037,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; /* copy block state */ ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); @@ -2954,6 +2987,26 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, return pos; } +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + /* ZSTD_writeLastEmptyBlock() : * output an empty Block with end-of-frame mark to complete a frame * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) @@ -3348,6 +3401,9 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); @@ -3377,6 +3433,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; } return 0; } @@ -3405,8 +3462,8 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) { - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, @@ -3415,9 +3472,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); @@ -3471,6 +3530,30 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) return op-ostart; } +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -3493,38 +3576,25 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); } + ZSTD_CCtx_trace(cctx, endResult); return cSize + endResult; } -static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_parameters* params) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); - DEBUGLOG(4, "ZSTD_compress_internal"); - return ZSTD_compress_advanced_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - &cctxParams); -} - size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, ZSTD_parameters params) { + ZSTD_CCtx_params cctxParams; DEBUGLOG(4, "ZSTD_compress_advanced"); FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - return ZSTD_compress_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - ¶ms); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); } /* Internal */ @@ -3548,10 +3618,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - assert(params.fParams.contentSizeFlag == 1); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } @@ -3698,7 +3771,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, assert(cdict != NULL); ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; - cdict->compressionLevel = 0; /* signals advanced API usage */ + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ return cdict; } @@ -3881,34 +3954,37 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { + ZSTD_CCtx_params cctxParams; DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); - { ZSTD_CCtx_params params = cctx->requestedParams; + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) - && (params.attachDictPref != ZSTD_dictForceLoad) ? + || cdict->compressionLevel == 0 ) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, cdict->dictContentSize); - /* Increase window log to fit the entire dictionary and source if the - * source size is known. Limit the increase to 19, which is the - * window log for compression level 1 with the largest source size. - */ - if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); - } - params.fParams = fParams; - return ZSTD_compressBegin_internal(cctx, - NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_not_buffered); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); } /* ZSTD_compressBegin_usingCDict() : @@ -4071,7 +4147,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } @@ -4376,6 +4452,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ } if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif /* mt context creation */ if (cctx->mtctx == NULL) { DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", @@ -4389,6 +4468,10 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, cctx->mtctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; cctx->streamStage = zcss_load; cctx->appliedParams = params; } else @@ -4450,8 +4533,12 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, size_t const ipos = input->pos; size_t const opos = output->pos; flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index c04998b8b18..6083ed66418 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -19,6 +19,7 @@ * Dependencies ***************************************/ #include "../common/zstd_internal.h" +#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */ #include "zstd_cwksp.h" #ifdef ZSTD_MULTITHREAD # include "zstdmt_compress.h" @@ -183,13 +184,22 @@ typedef struct { U32 checksum; } ldmEntry_t; +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + typedef struct { ZSTD_window_t window; /* State for the window round buffer management */ ldmEntry_t* hashTable; U32 loadedDictEnd; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; } ldmState_t; typedef struct { @@ -270,6 +280,7 @@ struct ZSTD_CCtx_s { ZSTD_CCtx_params requestedParams; ZSTD_CCtx_params appliedParams; U32 dictID; + size_t dictContentSize; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ size_t blockSize; @@ -321,6 +332,11 @@ struct ZSTD_CCtx_s { #ifdef ZSTD_MULTITHREAD ZSTDMT_CCtx* mtctx; #endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -1200,4 +1216,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe * condition for correct operation : hashLog > 1 */ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + #endif /* ZSTD_COMPRESS_H */ diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index 6dd1c1447a9..1d9188d330f 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); @@ -42,7 +42,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index 8b087057432..c8ebe2fc4e7 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index be30c08c6b8..4bccf015259 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h index 68c6f9a5acd..3629b3f317c 100644 --- a/lib/compress/zstd_compress_sequences.h +++ b/lib/compress/zstd_compress_sequences.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index e23e619eef1..6a7b02342fd 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -304,7 +304,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, * before we know the table size + compressed size, so we have a bound on the * table size. If we guessed incorrectly, we fall back to uncompressed literals. * - * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded * in writing the header, otherwise it is set to 0. * * hufMetadata->hType has literals block type info. diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h index 07f4cb1dc64..8138afa4c04 100644 --- a/lib/compress/zstd_compress_superblock.h +++ b/lib/compress/zstd_compress_superblock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h index d65170b39ca..daec8bd4d84 100644 --- a/lib/compress/zstd_cwksp.h +++ b/lib/compress/zstd_cwksp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -503,7 +503,7 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { /** * Moves the management of a workspace from one cwksp to another. The src cwksp - * is left in an invalid state (src must be re-init()'ed before its used again). + * is left in an invalid state (src must be re-init()'ed before it's used again). */ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { *dst = *src; diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index ef12a524f7c..9ed8fa6ef6f 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index 14d944d69bc..b17cf3ee519 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index db7ce83d0ad..8f8dfcd7a77 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(endIndex - prefixStartIndex <= maxDistance); (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - /* ensure there will be no no underflow + /* ensure there will be no underflow * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index cf6aaa8e675..1a5fd33a07a 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 49ec1b09eff..3d35ee430e0 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index d0214d5e739..87a397196f1 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 3f3d7c46ab0..fb4b8a0fd0c 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,13 +11,99 @@ #include "zstd_ldm.h" #include "../common/debug.h" +#include "../common/xxhash.h" #include "zstd_fast.h" /* ZSTD_fillHashTable() */ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ +#include "zstd_ldm_geartab.h" #define LDM_BUCKET_SIZE_LOG 3 #define LDM_MIN_MATCH_LENGTH 64 #define LDM_HASH_RLOG 7 -#define LDM_HASH_CHAR_OFFSET 10 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} void ZSTD_ldm_adjustParameters(ldmParams_t* params, ZSTD_compressionParameters const* cParams) @@ -54,41 +140,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; } -/** ZSTD_ldm_getSmallHash() : - * numBits should be <= 32 - * If numBits==0, returns 0. - * @return : the most significant numBits of value. */ -static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) -{ - assert(numBits <= 32); - return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); -} - -/** ZSTD_ldm_getChecksum() : - * numBitsToDiscard should be <= 32 - * @return : the next most significant 32 bits after numBitsToDiscard */ -static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) -{ - assert(numBitsToDiscard <= 32); - return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; -} - -/** ZSTD_ldm_getTag() ; - * Given the hash, returns the most significant numTagBits bits - * after (32 + hbits) bits. - * - * If there are not enough bits remaining, return the last - * numTagBits bits. */ -static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) -{ - assert(numTagBits < 32 && hbits <= 32); - if (32 - hbits < numTagBits) { - return hash & (((U32)1 << numTagBits) - 1); - } else { - return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); - } -} - /** ZSTD_ldm_getBucket() : * Returns a pointer to the start of the bucket associated with hash. */ static ldmEntry_t* ZSTD_ldm_getBucket( @@ -103,38 +154,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, size_t const hash, const ldmEntry_t entry, ldmParams_t const ldmParams) { - BYTE* const bucketOffsets = ldmState->bucketOffsets; - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; - bucketOffsets[hash]++; - bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; -} + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); -/** ZSTD_ldm_makeEntryAndInsertByTag() : - * - * Gets the small hash, checksum, and tag from the rollingHash. - * - * If the tag matches (1 << ldmParams.hashRateLog)-1, then - * creates an ldmEntry from the offset, and inserts it into the hash table. - * - * hBits is the length of the small hash, which is the most significant hBits - * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmParams.hashRateLog bits that make up the tag. */ -static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, - U64 const rollingHash, - U32 const hBits, - U32 const offset, - ldmParams_t const ldmParams) -{ - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); - U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; - if (tag == tagMask) { - U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - ldmEntry_t entry; - entry.offset = offset; - entry.checksum = checksum; - ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); - } } /** ZSTD_ldm_countBackwardsMatch() : @@ -212,43 +237,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, return 0; } -/** ZSTD_ldm_fillLdmHashTable() : - * - * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). - * lastHash is the rolling hash that corresponds to lastHashed. - * - * Returns the rolling hash corresponding to position iend-1. */ -static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, - U64 lastHash, const BYTE* lastHashed, - const BYTE* iend, const BYTE* base, - U32 hBits, ldmParams_t const ldmParams) -{ - U64 rollingHash = lastHash; - const BYTE* cur = lastHashed + 1; - - while (cur < iend) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], - cur[ldmParams.minMatchLength-1], - state->hashPower); - ZSTD_ldm_makeEntryAndInsertByTag(state, - rollingHash, hBits, - (U32)(cur - base), ldmParams); - ++cur; - } - return rollingHash; -} - void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, + ldmState_t* ldmState, const BYTE* ip, const BYTE* iend, ldmParams_t const* params) { + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); - if ((size_t)(iend - ip) >= params->minMatchLength) { - U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); - ZSTD_ldm_fillLdmHashTable( - state, startingHash, ip, iend - params->minMatchLength, state->window.base, - params->hashLog - params->bucketSizeLog, - *params); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; } } @@ -274,11 +298,8 @@ static size_t ZSTD_ldm_generateSequences_internal( /* LDM parameters */ int const extDict = ZSTD_window_hasExtDict(ldmState->window); U32 const minMatchLength = params->minMatchLength; - U64 const hashPower = ldmState->hashPower; + U32 const entsPerBucket = 1U << params->bucketSizeLog; U32 const hBits = params->hashLog - params->bucketSizeLog; - U32 const ldmBucketSize = 1U << params->bucketSizeLog; - U32 const hashRateLog = params->hashRateLog; - U32 const ldmTagMask = (1U << params->hashRateLog) - 1; /* Prefix and extDict parameters */ U32 const dictLimit = ldmState->window.dictLimit; U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; @@ -290,45 +311,76 @@ static size_t ZSTD_ldm_generateSequences_internal( /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; - BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + BYTE const* const ilimit = iend - HASH_READ_SIZE; /* Input positions */ BYTE const* anchor = istart; BYTE const* ip = istart; - /* Rolling hash */ - BYTE const* lastHashed = NULL; - U64 rollingHash = 0; - - while (ip <= ilimit) { - size_t mLength; - U32 const curr = (U32)(ip - base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], - lastHashed[minMatchLength], - hashPower); - } else { - rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + { + size_t n = 0; + + while (n < minMatchLength) { + numSplits = 0; + n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, + splits, &numSplits); } - lastHashed = ip; + ip += minMatchLength; + } - /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { - ip++; - continue; + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); } - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - *params); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { size_t curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { @@ -342,31 +394,23 @@ static size_t ZSTD_ldm_generateSequences_internal( cur->offset < dictLimit ? dictEnd : iend; BYTE const* const lowMatchPtr = cur->offset < dictLimit ? dictStart : lowPrefixPtr; - - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); if (curForwardMatchLength < minMatchLength) { continue; } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor, - pMatch, lowMatchPtr, - dictStart, dictEnd); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); } else { /* !extDict */ BYTE const* const pMatch = base + cur->offset; - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + curForwardMatchLength = ZSTD_count(split, pMatch, iend); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowPrefixPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; @@ -375,57 +419,39 @@ static size_t ZSTD_ldm_generateSequences_internal( bestEntry = cur; } } - } - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, curr, - *params); - ip++; - continue; - } - - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; - - { - /* Store the sequence: - * ip = curr - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) - */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = curr - matchIndex; - rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; - - /* Out of sequence storage */ - if (rawSeqStore->size == rawSeqStore->capacity) - return ERROR(dstSize_tooSmall); - seq->litLength = (U32)(ip - anchor); - seq->matchLength = (U32)mLength; - seq->offset = offset; - rawSeqStore->size++; - } + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - *params); + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + { + U32 const offset = (U32)(split - base) - bestEntry->offset; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } - assert(ip + backwardMatchLength == lastHashed); + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); - /* Fill the hash table from lastHashed+1 to ip+mLength*/ - /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength <= ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, *params); - lastHashed = ip + mLength - 1; + anchor = split + forwardMatchLength; } - ip += mLength; - anchor = ip; + + ip += hashed; } + return iend - anchor; } @@ -620,7 +646,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); - /* Loop through each sequence and apply the block compressor to the lits */ + /* Loop through each sequence and apply the block compressor to the literals */ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { /* maybeSplitSequence updates rawSeqStore->pos */ rawSeq const sequence = maybeSplitSequence(rawSeqStore, diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 6561024e4c9..e9137f0630e 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -73,7 +73,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, * * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). */ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch); diff --git a/lib/compress/zstd_ldm_geartab.h b/lib/compress/zstd_ldm_geartab.h new file mode 100644 index 00000000000..d24c1f6d394 --- /dev/null +++ b/lib/compress/zstd_ldm_geartab.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index e55c459debd..6ec368b8515 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 9aba8a9018c..bf31cccf5ca 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 50454a50b9b..e28e8bb3b54 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -472,8 +472,6 @@ ZSTDMT_serialState_reset(serialState_t* serialState, ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashRateLog < 32); - serialState->ldmState.hashPower = - ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); } else { ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); } @@ -486,10 +484,10 @@ ZSTDMT_serialState_reset(serialState_t* serialState, size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); unsigned const bucketLog = params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; - size_t const bucketSize = (size_t)1 << bucketLog; unsigned const prevBucketLog = serialState->params.ldmParams.hashLog - serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; /* Size the seq pool tables */ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ @@ -501,13 +499,13 @@ ZSTDMT_serialState_reset(serialState_t* serialState, } if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); - serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); } if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) return 1; /* Zero the tables */ ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); - ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); /* Update window state and fill hash table with dict */ serialState->ldmState.loadedDictEnd = 0; @@ -683,6 +681,8 @@ static void ZSTDMT_compressionJob(void* jobDescription) if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; /* Don't run LDM for the chunks, since we handle it externally */ jobParams.ldmParams.enableLdm = 0; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; /* init */ @@ -750,6 +750,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) if (ZSTD_isError(cSize)) JOB_ERROR(cSize); lastCBlockSize = cSize; } } + ZSTD_CCtx_trace(cctx, 0); _endJob: ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 0a9e551c99b..c69031b2127 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 14182067185..7699c920374 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman decoder, * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index f5cc23b387c..443fe6b48bf 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_ddict.h b/lib/decompress/zstd_ddict.h index 8906a71c940..2f17445728f 100644 --- a/lib/decompress/zstd_ddict.h +++ b/lib/decompress/zstd_ddict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 21f846bc77e..15139501bea 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -58,10 +58,12 @@ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ +#include "../common/zstd_trace.h" #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" +#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */ #include "../common/zstd_internal.h" /* blockProperties_t */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ @@ -72,6 +74,144 @@ #endif + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + /*-************************************************************* * Context management ***************************************************************/ @@ -101,6 +241,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->outBufferMode = ZSTD_bm_buffered; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; } static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) @@ -120,8 +261,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; ZSTD_DCtx_resetParameters(dctx); - dctx->validateChecksum = 1; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -178,6 +319,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) if (dctx->legacyContext) ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); #endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } ZSTD_customFree(dctx, cMem); return 0; } @@ -190,6 +335,29 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + /*-************************************************************* * Frame header decoding @@ -441,12 +609,19 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) /** ZSTD_decodeFrameHeader() : * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* Skip the dictID check in fuzzing mode, because it makes the search * harder. @@ -456,6 +631,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he #endif dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; return 0; } @@ -578,7 +754,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); + ZSTD_checkContinuity(dctx, blockStart, blockSize); dctx->previousDstEnd = (const char*)blockStart + blockSize; return blockSize; } @@ -610,6 +786,32 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, return regenSize; } +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + /*! ZSTD_decompressFrame() : * @dctx must be properly initialized @@ -619,8 +821,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void** srcPtr, size_t *srcSizePtr) { - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -695,7 +898,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += 4; remainingSrcSize -= 4; } - + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; @@ -764,7 +967,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, * use this in all cases but ddict */ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); } - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); @@ -899,7 +1102,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; switch (dctx->stage) { @@ -1004,6 +1209,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; /* ends here */ dctx->stage = ZSTDds_getFrameHeaderSize; } @@ -1023,6 +1229,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -1176,8 +1383,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = ZSTD_trace_decompress_begin(dctx); +#endif dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; dctx->prefixStart = NULL; @@ -1391,6 +1602,16 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) if (ddict) { dctx->ddict = ddict; dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } } return 0; } @@ -1436,6 +1657,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.lowerBound = (int)ZSTD_d_validateChecksum; bounds.upperBound = (int)ZSTD_d_ignoreChecksum; return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1473,6 +1698,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value case ZSTD_d_forceIgnoreChecksum: *value = (int)dctx->forceIgnoreChecksum; return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1499,6 +1727,13 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1680,6 +1915,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } #endif { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } DEBUGLOG(5, "header size : %u", (U32)hSize); if (ZSTD_isError(hSize)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 19cbdc5c16e..b71bc20d65b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -577,7 +577,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; int nbSeq; @@ -1108,7 +1108,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -1242,7 +1242,7 @@ ZSTD_decompressSequencesLong_body( { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -1517,9 +1517,9 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, } -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) { - if (dst != dctx->previousDstEnd) { /* not contiguous */ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ dctx->dictEnd = dctx->previousDstEnd; dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dst; @@ -1533,7 +1533,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { size_t dSize; - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); dctx->previousDstEnd = (char*)dst + dSize; return dSize; diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index b5715c168e2..891cf98350a 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index f80b471e994..3fcec6c5667 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -21,6 +21,7 @@ *********************************************************/ #include "../common/mem.h" /* BYTE, U16, U32 */ #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ +#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */ @@ -99,6 +100,13 @@ typedef enum { ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ } ZSTD_dictUses_e; +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -113,6 +121,7 @@ struct ZSTD_DCtx_s const void* dictEnd; /* end of previous segment */ size_t expected; ZSTD_frameHeader fParams; + U64 processedCSize; U64 decodedSize; blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ ZSTD_dStage stage; @@ -136,6 +145,8 @@ struct ZSTD_DCtx_s U32 dictID; int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ /* streaming */ ZSTD_dStreamStage streamStage; @@ -166,6 +177,11 @@ struct ZSTD_DCtx_s void const* dictContentBeginForFuzzing; void const* dictContentEndForFuzzing; #endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ @@ -184,7 +200,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, * If yes, do nothing (continue on current segment). * If not, classify previous segment as "external dictionary", and start a new segment. * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); #endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h index 03cb14a039d..ed98b46006e 100644 --- a/lib/deprecated/zbuff.h +++ b/lib/deprecated/zbuff.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_common.c b/lib/deprecated/zbuff_common.c index 579bc4df14a..cb370cbc52a 100644 --- a/lib/deprecated/zbuff_common.c +++ b/lib/deprecated/zbuff_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_compress.c b/lib/deprecated/zbuff_compress.c index 2d20b137759..972afd8738f 100644 --- a/lib/deprecated/zbuff_compress.c +++ b/lib/deprecated/zbuff_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_decompress.c b/lib/deprecated/zbuff_decompress.c index d3c49e84b81..baf18294e3a 100644 --- a/lib/deprecated/zbuff_decompress.c +++ b/lib/deprecated/zbuff_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index c78af133a9d..f069b6378bc 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -1062,18 +1062,19 @@ typedef struct COVER_tryParameters_data_s { * This function is thread safe if zstd is compiled with multithreaded support. * It takes its parameters as an *OWNING* opaque pointer to support threading. */ -static void COVER_tryParameters(void *opaque) { +static void COVER_tryParameters(void *opaque) +{ /* Save parameters as local variables */ - COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque; const COVER_ctx_t *const ctx = data->ctx; const ZDICT_cover_params_t parameters = data->parameters; size_t dictBufferCapacity = data->dictBufferCapacity; size_t totalCompressedSize = ERROR(GENERIC); /* Allocate space for hash table, dict, and freqs */ COVER_map_t activeDmers; - BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); - U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); goto _cleanup; @@ -1103,15 +1104,14 @@ static void COVER_tryParameters(void *opaque) { free(data); COVER_map_destroy(&activeDmers); COVER_dictSelectionFree(selection); - if (freqs) { - free(freqs); - } + free(freqs); } ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - ZDICT_cover_params_t *parameters) { + void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters) +{ /* constants */ const unsigned nbThreads = parameters->nbThreads; const double splitPoint = diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h index 9f1cb5fb955..7bbeaac15e2 100644 --- a/lib/dictBuilder/cover.h +++ b/lib/dictBuilder/cover.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Facebook, Inc. + * Copyright (c) 2017-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/dictBuilder/divsufsort.c b/lib/dictBuilder/divsufsort.c index ead9220442b..a2870fb3ba3 100644 --- a/lib/dictBuilder/divsufsort.c +++ b/lib/dictBuilder/divsufsort.c @@ -1576,7 +1576,7 @@ sort_typeBstar(const unsigned char *T, int *SA, /* Construct the inverse suffix array of type B* suffixes using trsort. */ trsort(ISAb, SA, m, 1); - /* Set the sorted order of tyoe B* suffixes. */ + /* Set the sorted order of type B* suffixes. */ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } if(0 <= i) { diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 5e60f24c587..5f880e4e412 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Facebook, Inc. + * Copyright (c) 2018-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -462,20 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s { * This function is thread safe if zstd is compiled with multithreaded support. * It takes its parameters as an *OWNING* opaque pointer to support threading. */ -static void FASTCOVER_tryParameters(void *opaque) +static void FASTCOVER_tryParameters(void* opaque) { /* Save parameters as local variables */ - FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque; + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque; const FASTCOVER_ctx_t *const ctx = data->ctx; const ZDICT_cover_params_t parameters = data->parameters; size_t dictBufferCapacity = data->dictBufferCapacity; size_t totalCompressedSize = ERROR(GENERIC); /* Initialize array to keep track of frequency of dmer within activeSegment */ - U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); + U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); /* Allocate space for hash table, dict, and freqs */ - BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); - U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); if (!segmentFreqs || !dict || !freqs) { DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); goto _cleanup; diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 79c522ef367..4df5a94a269 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -23,9 +23,13 @@ /* Unix Large Files support (>4GB) */ #define _FILE_OFFSET_BITS 64 #if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE # define _LARGEFILE_SOURCE +# endif #elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE # define _LARGEFILE64_SOURCE +# endif #endif @@ -967,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced( return MIN(dictBufferCapacity, hSize+dictContentSize); } -/* Hidden declaration for dbio.c */ -size_t ZDICT_trainFromBuffer_unsafe_legacy( - void* dictBuffer, size_t maxDictSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_legacy_params_t params); /*! ZDICT_trainFromBuffer_unsafe_legacy() : -* Warning : `samplesBuffer` must be followed by noisy guard band. +* Warning : `samplesBuffer` must be followed by noisy guard band !!! * @return : size of dictionary, or an error code which can be tested with ZDICT_isError() */ -size_t ZDICT_trainFromBuffer_unsafe_legacy( +static size_t ZDICT_trainFromBuffer_unsafe_legacy( void* dictBuffer, size_t maxDictSize, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t params) diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index b782993f9d2..190ffa52a45 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -264,10 +264,11 @@ typedef struct { * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. */ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( - void *dictBuffer, size_t dictBufferCapacity, - const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters); + /* Deprecation warnings */ /* It is generally possible to disable deprecation warnings from compiler, for example with -Wno-deprecated-declarations for gcc diff --git a/lib/dll/example/Makefile b/lib/dll/example/Makefile index 8f19195595b..a1cc189d5f5 100644 --- a/lib/dll/example/Makefile +++ b/lib/dll/example/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index 6bea6a519ab..3f21d226730 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index 13115bec5f8..e2b93edb760 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h index 7910351726c..f85d65de1fa 100644 --- a/lib/legacy/zstd_v01.h +++ b/lib/legacy/zstd_v01.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 9abb6d03390..0368cb643c4 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h index 5f8f6cd60cc..4dc2c7f84b8 100644 --- a/lib/legacy/zstd_v02.h +++ b/lib/legacy/zstd_v02.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index a19cb205a56..e2a35b4d0f9 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h index 5fc72730ce9..89fa3b1bdb0 100644 --- a/lib/legacy/zstd_v03.h +++ b/lib/legacy/zstd_v03.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index 77d52555b31..9c73d4a5379 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h index 15fce0d487f..8d93215348a 100644 --- a/lib/legacy/zstd_v04.h +++ b/lib/legacy/zstd_v04.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index ca8d5c9bf03..6027a7b6b7d 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -2833,7 +2833,7 @@ static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src, static size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - const BYTE* const in = (const BYTE* const)src; + const BYTE* const in = (const BYTE*)src; BYTE headerFlags; U32 cSize; @@ -3002,7 +3002,7 @@ static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb, const void* src, size_t srcSize, U32 flagStaticTable) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* const iend = istart + srcSize; U32 LLtype, Offtype, MLtype; @@ -3310,7 +3310,7 @@ static size_t ZSTDv05_decompressSequences( { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; size_t errorCode, dumpsLength=0; @@ -3423,7 +3423,7 @@ static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx, { const BYTE* ip = (const BYTE*)src; const BYTE* iend = ip + srcSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h index 167d892e665..074ff1bc019 100644 --- a/lib/legacy/zstd_v05.h +++ b/lib/legacy/zstd_v05.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index c4ac7dba8eb..76fc838d9d2 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -3029,7 +3029,7 @@ typedef struct * Provides the size of compressed block from block header `src` */ static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - const BYTE* const in = (const BYTE* const)src; + const BYTE* const in = (const BYTE*)src; U32 cSize; if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong); @@ -3223,7 +3223,7 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr, FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable, const void* src, size_t srcSize) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; @@ -3445,7 +3445,7 @@ static size_t ZSTDv06_decompressSequences( { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -3561,7 +3561,7 @@ static size_t ZSTDv06_decompressFrame(ZSTDv06_DCtx* dctx, { const BYTE* ip = (const BYTE*)src; const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = ostart + dstCapacity; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h index 2fd99e629d4..1fa8f9dc531 100644 --- a/lib/legacy/zstd_v06.h +++ b/lib/legacy/zstd_v06.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 049ba474940..1239d814ac1 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -3258,7 +3258,7 @@ typedef struct * Provides the size of compressed block from block header `src` */ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - const BYTE* const in = (const BYTE* const)src; + const BYTE* const in = (const BYTE*)src; U32 cSize; if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong); @@ -3453,7 +3453,7 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr, FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable, const void* src, size_t srcSize) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; @@ -3672,7 +3672,7 @@ static size_t ZSTDv07_decompressSequences( { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -3799,7 +3799,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx, { const BYTE* ip = (const BYTE*)src; const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h index 9da50c4e641..3982a04e335 100644 --- a/lib/legacy/zstd_v07.h +++ b/lib/legacy/zstd_v07.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/zstd.h b/lib/zstd.h index b0ecdf55385..222339d71a3 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -72,7 +72,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 8 +#define ZSTD_VERSION_RELEASE 9 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -546,12 +546,14 @@ typedef enum { * ZSTD_d_format * ZSTD_d_stableOutBuffer * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ ZSTD_d_experimentalParam1=1000, ZSTD_d_experimentalParam2=1001, - ZSTD_d_experimentalParam3=1002 + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 } ZSTD_dParameter; @@ -948,7 +950,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s * Reference a prepared dictionary, to be used for all next compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. * The dictionary will remain valid for future compressed frames using same CCtx. * @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -999,6 +1001,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s /*! ZSTD_DCtx_refDDict() : * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : Currently, only one dictionary can be managed. * Referencing a new dictionary effectively "discards" any previous one. @@ -1205,6 +1214,12 @@ typedef enum { ZSTD_d_ignoreChecksum = 1 } ZSTD_forceIgnoreChecksum_e; +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + typedef enum { /* Note: this enum and the behavior it controls are effectively internal * implementation details of the compressor. They are expected to continue @@ -1286,7 +1301,7 @@ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t * `srcSize` must be the _exact_ size of this series * (i.e. there should be a frame boundary at `src + srcSize`) * @return : - upper-bound for the decompressed size of all data in all successive frames - * - if an error occured: ZSTD_CONTENTSIZE_ERROR + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR * * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. @@ -1372,6 +1387,23 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size const void* src, size_t srcSize); +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + /*************************************** * Memory management ***************************************/ @@ -1513,6 +1545,7 @@ ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + /* * This API is temporary and is expected to change or disappear in the future! */ @@ -1523,10 +1556,12 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( const ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + /*************************************** * Advanced compression functions @@ -1802,7 +1837,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_params : @@ -1857,7 +1892,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_c * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -1983,6 +2018,30 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param */ #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + /*! ZSTD_DCtx_setFormat() : * Instruct the decoder context about what kind of data to decode next. * This instruction is mandatory to decode data without a fully-formed header, diff --git a/programs/.gitignore b/programs/.gitignore index 662f708399f..2d4edbe45b1 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -8,6 +8,7 @@ zstd-frugal zstd-small zstd-nolegacy zstd-dictBuilder +zstd-dll # Object files *.o diff --git a/programs/Makefile b/programs/Makefile index 8641d0ee48a..936f3c804d4 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -61,8 +61,10 @@ DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-prototypes -Wundef -Wpointer-arith \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes -Wc++-compat -CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) +CFLAGS += $(DEBUGFLAGS) +CPPFLAGS += $(MOREFLAGS) +LDFLAGS += $(MOREFLAGS) +FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) ZSTDLIB_COMMON := $(ZSTDDIR)/common ZSTDLIB_COMPRESS := $(ZSTDDIR)/compress @@ -88,13 +90,13 @@ endif # Sort files in alphabetical order for reproducible builds ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC)) -ZSTDLIB_LOCAL_SRC := $(notdir $(ZSTDLIB_FULL_SRC)) +ZSTDLIB_LOCAL_SRC = $(notdir $(ZSTDLIB_FULL_SRC)) ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_SRC:.c=.o) ZSTD_CLI_SRC := $(wildcard *.c) ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o) -ZSTD_ALL_SRC := $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC) +ZSTD_ALL_SRC = $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC) ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o) UNAME := $(shell uname) @@ -102,6 +104,8 @@ ifeq ($(UNAME), Darwin) HASH ?= md5 else ifeq ($(UNAME), FreeBSD) HASH ?= gmd5sum +else ifeq ($(UNAME), NetBSD) + HASH ?= md5 -n else ifeq ($(UNAME), OpenBSD) HASH ?= md5 endif @@ -109,7 +113,7 @@ HASH ?= md5sum HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0) ifndef BUILD_DIR -HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ") +HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ") ifeq ($(HAVE_HASH),0) $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags) BUILD_DIR := obj/generic_noconf @@ -192,11 +196,13 @@ endif endif SET_CACHE_DIRECTORY = \ - $(MAKE) --no-print-directory $@ \ + +$(MAKE) --no-print-directory $@ \ BUILD_DIR=obj/$(HASH_DIR) \ CPPFLAGS="$(CPPFLAGS)" \ CFLAGS="$(CFLAGS)" \ - LDFLAGS="$(LDFLAGS)" + LDFLAGS="$(LDFLAGS)" \ + LDLIBS="$(LDLIBS)" \ + ZSTD_ALL_SRC="$(ZSTD_ALL_SRC)" .PHONY: all @@ -207,7 +213,8 @@ allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy zstd-di .PHONY: zstd # must always be run zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) -zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) +zstd : LDFLAGS += $(THREAD_LD) $(DEBUGFLAGS_LD) +zstd : LDLIBS += $(ZLIBLD) $(LZMALD) $(LZ4LD) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) ifneq (,$(filter Windows%,$(OS))) zstd : $(RES_FILE) @@ -229,7 +236,7 @@ $(BUILD_DIR)/zstd : $(ZSTD_OBJ) @echo "$(LZMA_MSG)" @echo "$(LZ4_MSG)" @echo LINK $@ - $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) + $(CC) $(FLAGS) $^ $(LDLIBS) -o $@$(EXT) ifeq ($(HAVE_HASH),1) SRCBIN_HASH = $(shell cat $(BUILD_DIR)/zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ") @@ -284,18 +291,12 @@ zstd-noxz : LZMALD := zstd-noxz : LZMA_MSG := - xz/lzma support is disabled zstd-noxz : zstd -## zstd-dll: zstd executable linked to dynamic library libzstd (must already exist) -# note : the following target doesn't link -# because zstd uses non-public symbols from libzstd -# such as XXH64 (for benchmark), -# ZDICT_trainFromBuffer_unsafe_legacy (for dictionary builder) -# and ZSTD_cycleLog (likely for --patch-from). -# It's unclear at this stage if this is a scenario that must be supported +## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version) .PHONY: zstd-dll -zstd-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd -zstd-dll : ZSTDLIB_FULL_SRC = -zstd-dll : $(ZSTD_CLI_OBJ) - $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) +zstd-dll : LDFLAGS+= -L$(ZSTDDIR) +zstd-dll : LDLIBS += -lzstd +zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c +zstd-dll : zstd ## zstd-pgo: zstd executable optimized with PGO. @@ -315,16 +316,16 @@ zstd-pgo : ## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format. zstd-small: CFLAGS = -Os -s zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOTRACE $^ -o $@$(EXT) zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT) zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT) ## zstd-dictBuilder: executable supporting dictionary creation and compression (only) -zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS +zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -DZSTD_NOTRACE zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c $(CC) $(FLAGS) $^ -o $@$(EXT) @@ -346,9 +347,11 @@ endif .PHONY: clean clean: $(RM) core *.o tmp* result* *.gcda dictionary *.zst \ - zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \ + zstd$(EXT) zstd32$(EXT) zstd-dll$(EXT) \ + zstd-compress$(EXT) zstd-decompress$(EXT) \ zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \ - zstd-dictBuilder$(EXT) *.gcda default*.profraw default.profdata have_zlib$(EXT) + zstd-dictBuilder$(EXT) \ + *.gcda default*.profraw default.profdata have_zlib$(EXT) $(RM) -r obj/* @echo Cleaning completed diff --git a/programs/benchfn.c b/programs/benchfn.c index ed7273afb6e..ce39f41d3bb 100644 --- a/programs/benchfn.c +++ b/programs/benchfn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchfn.h b/programs/benchfn.h index e555bbe6ae3..8c36831e142 100644 --- a/programs/benchfn.h +++ b/programs/benchfn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 77056203d55..314a3430aae 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchzstd.h b/programs/benchzstd.h index 8c55b3c4f29..769268532fc 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/datagen.c b/programs/datagen.c index 4353b7ff994..835e2c027cb 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/datagen.h b/programs/datagen.h index 5a2682d8f9f..97e0c2b06fd 100644 --- a/programs/datagen.h +++ b/programs/datagen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/dibio.c b/programs/dibio.c index cb3829e3e59..c6d267cea3f 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -255,18 +255,6 @@ static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, si } -/*! ZDICT_trainFromBuffer_unsafe_legacy() : - Strictly Internal use only !! - Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`. - `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - or an error code. -*/ -size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_legacy_params_t parameters); - - int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams, @@ -319,9 +307,9 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, { size_t dictSize; if (params) { DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */ - dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, - srcBuffer, sampleSizes, fs.nbSamples, - *params); + dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, fs.nbSamples, + *params); } else if (coverParams) { if (optimize) { dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, diff --git a/programs/dibio.h b/programs/dibio.h index 682723d6a54..98ba9110bf9 100644 --- a/programs/dibio.h +++ b/programs/dibio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio.c b/programs/fileio.c index 65f2d531a81..1f1cbb9e3ab 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -45,7 +45,6 @@ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ #include "../lib/zstd.h" #include "../lib/common/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ -#include "../lib/compress/zstd_compress_internal.h" #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) # include @@ -77,6 +76,11 @@ /*-************************************* * Macros ***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) +#undef MAX +#define MAX(a,b) ((a)>(b) ? (a) : (b)) struct FIO_display_prefs_s { int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ @@ -675,14 +679,11 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, FIO_removeFile(dstFileName); } } - { FILE* const f = fopen( dstFileName, "wb" ); + { const int old_umask = UTIL_umask(0177); /* u-x,go-rwx */ + FILE* const f = fopen( dstFileName, "wb" ); + UTIL_umask(old_umask); if (f == NULL) { DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); - } else if (srcFileName != NULL - && strcmp (srcFileName, stdinmark) - && strcmp(dstFileName, nulmark) ) { - /* reduce rights on newly created dst file while compression is ongoing */ - UTIL_chmod(dstFileName, NULL, 00600); } return f; } @@ -840,7 +841,7 @@ static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, /* FIO_removeMultiFilesWarning() : * Returns 1 if the console should abort, 0 if console should proceed. * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts. - * + * * If -f is specified, or there is just 1 file, zstd will always proceed as usual. * If --rm is specified, there will be a prompt asking for user confirmation. * If -f is specified with --rm, zstd will proceed as usual @@ -897,6 +898,15 @@ typedef struct { ZSTD_CStream* cctx; } cRess_t; +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + assert(hashLog > 1); + return hashLog - btScale; +} + static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, ZSTD_compressionParameters* comprParams, unsigned long long const dictSize, @@ -983,7 +993,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); - CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); /* multi-threading */ @@ -1350,7 +1360,7 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, /* display notification; and adapt compression level */ if (READY_FOR_UPDATE()) { ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); - double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; + double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; /* display progress notifications */ if (g_display_prefs.displayLevel >= 3) { @@ -1545,7 +1555,7 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, fCtx->totalBytesOutput += (size_t)compressedfilesize; DISPLAYLEVEL(2, "\r%79s\r", ""); if (g_display_prefs.displayLevel >= 2 && - !fCtx->hasStdoutOutput && + !fCtx->hasStdoutOutput && (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { if (readsize == 0) { DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", @@ -1555,7 +1565,7 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, } else { DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", srcFileName, - (double)compressedfilesize / readsize * 100, + (double)compressedfilesize / (double)readsize * 100, (unsigned long long)readsize, (unsigned long long) compressedfilesize, dstFileName); } @@ -1795,7 +1805,7 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, int status; int error = 0; cRess_t ress = FIO_createCResources(prefs, dictFileName, - FIO_getLargestFileSize(inFileNamesTable, fCtx->nbFilesTotal), + FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal), compressionLevel, comprParams); /* init */ @@ -1821,7 +1831,7 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, } } else { if (outMirroredRootDirName) - UTIL_mirrorSourceFilesDirectories(inFileNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; @@ -1845,7 +1855,7 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, } if (outDirName) - FIO_checkFilenameCollisions(inFileNamesTable , fCtx->nbFilesTotal); + FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); } if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { @@ -1892,7 +1902,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); - + ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); @@ -2099,7 +2109,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, if (srcFileLength>20) srcFileName += srcFileLength-20; } - ZSTD_resetDStream(ress->dctx); + ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; @@ -2747,7 +2757,7 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, strerror(errno)); } else { if (outMirroredRootDirName) - UTIL_mirrorSourceFilesDirectories(srcNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); + UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; @@ -2769,9 +2779,9 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, error |= status; } if (outDirName) - FIO_checkFilenameCollisions(srcNamesTable , fCtx->nbFilesTotal); + FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); } - + if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); @@ -2938,7 +2948,7 @@ displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) double const windowSizeUnit = (double)info->windowSize / unit; double const compressedSizeUnit = (double)info->compressedSize / unit; double const decompressedSizeUnit = (double)info->decompressedSize / unit; - double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize; + double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize; const char* const checkString = (info->usesCheck ? "XXH64" : "None"); if (displayLevel <= 2) { if (!info->decompUnavailable) { @@ -3059,7 +3069,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB"; double const compressedSizeUnit = (double)total.compressedSize / unit; double const decompressedSizeUnit = (double)total.decompressedSize / unit; - double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize; + double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize; const char* const checkString = (total.usesCheck ? "XXH64" : ""); DISPLAYOUT("----------------------------------------------------------------- \n"); if (total.decompUnavailable) { diff --git a/programs/fileio.h b/programs/fileio.h index 05e6d06815f..007440bd295 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/platform.h b/programs/platform.h index 68be70bb333..b4c6ee90685 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/timefn.c b/programs/timefn.c index 95460d0d971..f04b8cc2645 100644 --- a/programs/timefn.c +++ b/programs/timefn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2019-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/timefn.h b/programs/timefn.h index 5d2818e8a1b..9118004333c 100644 --- a/programs/timefn.h +++ b/programs/timefn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/util.c b/programs/util.c index 5386d005c26..3fd4cd17e66 100644 --- a/programs/util.c +++ b/programs/util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -159,6 +159,15 @@ int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions) return chmod(filename, permissions); } +int UTIL_umask(int mode) { +#if PLATFORM_POSIX_VERSION > 0 + return umask(mode); +#else + /* do nothing, fake return value */ + return mode; +#endif +} + int UTIL_setFileStat(const char *filename, const stat_t *statbuf) { int res = 0; @@ -670,7 +679,27 @@ const char* UTIL_getFileExtension(const char* infilename) static int pathnameHas2Dots(const char *pathname) { - return NULL != strstr(pathname, ".."); + /* We need to figure out whether any ".." present in the path is a whole + * path token, which is the case if it is bordered on both sides by either + * the beginning/end of the path or by a directory separator. + */ + const char *needle = pathname; + while (1) { + needle = strstr(needle, ".."); + + if (needle == NULL) { + return 0; + } + + if ((needle == pathname || needle[-1] == PATH_SEP) + && (needle[2] == '\0' || needle[2] == PATH_SEP)) { + return 1; + } + + /* increment so we search for the next match */ + needle++; + }; + return 0; } static int isFileNameValidForMirroredOutput(const char *filename) diff --git a/programs/util.h b/programs/util.h index 25fa3f53aab..0e696f00313 100644 --- a/programs/util.h +++ b/programs/util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -22,7 +22,7 @@ extern "C" { #include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */ #include /* size_t, ptrdiff_t */ #include /* stat, utime */ -#include /* stat, chmod */ +#include /* stat, chmod, umask */ #include "../lib/common/mem.h" /* U64 */ @@ -152,6 +152,11 @@ U64 UTIL_getFileSizeStat(const stat_t* statbuf); */ int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions); +/** + * Wraps umask(). Does nothing when the platform doesn't have that concept. + */ +int UTIL_umask(int mode); + /* * In the absence of a pre-existing stat result on the file in question, these * functions will do a stat() call internally and then use that result to diff --git a/programs/windres/verrsrc.h b/programs/windres/verrsrc.h index 98156480ffd..6afb48ac432 100644 --- a/programs/windres/verrsrc.h +++ b/programs/windres/verrsrc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 73670daf6dc..1e4a7f4fea2 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -201,7 +201,8 @@ the last one takes effect. * `-o FILE`: save result into `FILE` * `-f`, `--force`: - overwrite output without prompting, and (de)compress symbolic links + disable input and output checks. Allows overwriting existing files, input + from console, output to stdout, operating on links, etc. * `-c`, `--stdout`: force write to standard output, even if it is the console * `--[no-]sparse`: diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 9b6f9153346..f263e969fb8 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -42,6 +42,9 @@ #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ #endif +#ifndef ZSTD_NOTRACE +# include "zstdcli_trace.h" +#endif #include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */ @@ -103,6 +106,24 @@ typedef enum { cover, fastCover, legacy } dictType; static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ +/*-************************************ +* Check Version (when CLI linked to dynamic library) +**************************************/ + +/* Due to usage of experimental symbols and capabilities by the CLI, + * the CLI must be linked against a dynamic library of same version */ +static void checkLibVersion(void) +{ + if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString())) { + DISPLAYLEVEL(1, "Error : incorrect library version (expecting : %s ; actual : %s ) \n", + ZSTD_VERSION_STRING, ZSTD_versionString()); + DISPLAYLEVEL(1, "Please update library to version %s, or use stand-alone zstd binary \n", + ZSTD_VERSION_STRING); + exit(1); + } +} + + /*-************************************ * Command Line **************************************/ @@ -126,7 +147,8 @@ static void usage(FILE* f, const char* programName) #endif DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n"); DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n"); - DISPLAY_F(f, " -f : overwrite output without prompting, also (de)compress links \n"); + DISPLAY_F(f, " -f : disable input and output checks. Allows overwriting existing files,\n"); + DISPLAY_F(f, " input from console, output to stdout, operating on links, etc.\n"); DISPLAY_F(f, "--rm : remove source file(s) after successful de/compression \n"); DISPLAY_F(f, " -k : preserve source file(s) (default) \n"); DISPLAY_F(f, " -h/-H : display help/long help and exit \n"); @@ -167,6 +189,11 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); #endif #endif /* ZSTD_NOCOMPRESS */ + +#ifndef ZSTD_NOTRACE + DISPLAYOUT( "\n"); + DISPLAYOUT( "--trace FILE : log tracing information to FILE."); +#endif DISPLAYOUT( "\n"); DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); @@ -696,6 +723,7 @@ int main(int const argCount, const char* argv[]) { int argNb, followLinks = 0, + forceStdin = 0, forceStdout = 0, hasStdout = 0, ldmFlag = 0, @@ -753,6 +781,7 @@ int main(int const argCount, const char* argv[]) /* init */ + checkLibVersion(); (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */ (void)memLimit; assert(argCount >= 1); @@ -807,7 +836,7 @@ int main(int const argCount, const char* argv[]) if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; } if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; } if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; } - if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; } + if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; continue; } if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); } if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); } if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; } @@ -897,6 +926,9 @@ int main(int const argCount, const char* argv[]) if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } #ifdef UTIL_HAS_MIRRORFILELIST if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } +#endif +#ifndef ZSTD_NOTRACE + if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; } #endif if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; } if (longCommandWArg(&argument, "--long")) { @@ -988,7 +1020,7 @@ int main(int const argCount, const char* argv[]) case 'D': argument++; NEXT_FIELD(dictFileName); break; /* Overwrite */ - case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break; + case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; argument++; break; /* Verbose mode */ case 'v': g_displayLevel++; argument++; break; @@ -1243,7 +1275,9 @@ int main(int const argCount, const char* argv[]) outFileName = stdoutmark; /* when input is stdin, default output is stdout */ /* Check if input/output defined as console; trigger an error in this case */ - if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) ) { + if (!forceStdin + && !strcmp(filenames->fileNames[0], stdinmark) + && IS_CONSOLE(stdin) ) { DISPLAYLEVEL(1, "stdin is a console, aborting\n"); CLEAN_RETURN(1); } @@ -1281,15 +1315,15 @@ int main(int const argCount, const char* argv[]) DISPLAY("error : can't use --patch-from=# on multiple files \n"); CLEAN_RETURN(1); } - - /* No status message in pipe mode (stdin - stdout) */ + + /* No status message in pipe mode (stdin - stdout) */ hasStdout = outFileName && !strcmp(outFileName,stdoutmark); if (hasStdout && (g_displayLevel==2)) g_displayLevel=1; /* IO Stream/File */ FIO_setHasStdoutOutput(fCtx, hasStdout); - FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize); + FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize); FIO_determineHasStdinInput(fCtx, filenames); FIO_setNotificationLevel(g_displayLevel); FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL); @@ -1374,6 +1408,9 @@ int main(int const argCount, const char* argv[]) if (main_pause) waitEnter(); UTIL_freeFileNamesTable(filenames); UTIL_freeFileNamesTable(file_of_names); +#ifndef ZSTD_NOTRACE + TRACE_finish(); +#endif return operationResult; } diff --git a/programs/zstdcli_trace.c b/programs/zstdcli_trace.c new file mode 100644 index 00000000000..cd220b92d9e --- /dev/null +++ b/programs/zstdcli_trace.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstdcli_trace.h" + +#include +#include + +#include "timefn.h" +#include "util.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "../lib/zstd.h" +/* We depend on the trace header to avoid duplicating the ZSTD_trace struct. + * But, we check the version so it is compatible with dynamic linking. + */ +#include "../lib/common/zstd_trace.h" +/* We only use macros from threading.h so it is compatible with dynamic linking */ +#include "../lib/common/threading.h" + +#if ZSTD_TRACE + +static FILE* g_traceFile = NULL; +static int g_mutexInit = 0; +static ZSTD_pthread_mutex_t g_mutex; +static UTIL_time_t g_enableTime = UTIL_TIME_INITIALIZER; + +void TRACE_enable(char const* filename) +{ + int const writeHeader = !UTIL_isRegularFile(filename); + if (g_traceFile) + fclose(g_traceFile); + g_traceFile = fopen(filename, "a"); + if (g_traceFile && writeHeader) { + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, "Algorithm, Version, Method, Mode, Level, Workers, Dictionary Size, Uncompressed Size, Compressed Size, Duration Nanos, Compression Ratio, Speed MB/s\n"); + } + g_enableTime = UTIL_getTime(); + if (!g_mutexInit) { + if (!ZSTD_pthread_mutex_init(&g_mutex, NULL)) { + g_mutexInit = 1; + } else { + TRACE_finish(); + } + } +} + +void TRACE_finish(void) +{ + if (g_traceFile) { + fclose(g_traceFile); + } + g_traceFile = NULL; + if (g_mutexInit) { + ZSTD_pthread_mutex_destroy(&g_mutex); + g_mutexInit = 0; + } +} + +static void TRACE_log(char const* method, PTime duration, ZSTD_Trace const* trace) +{ + int level = 0; + int workers = 0; + double const ratio = (double)trace->uncompressedSize / (double)trace->compressedSize; + double const speed = ((double)trace->uncompressedSize * 1000) / (double)duration; + if (trace->params) { + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_compressionLevel, &level); + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_nbWorkers, &workers); + } + assert(g_traceFile != NULL); + + ZSTD_pthread_mutex_lock(&g_mutex); + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, + "zstd, %u, %s, %s, %d, %d, %llu, %llu, %llu, %llu, %.2f, %.2f\n", + trace->version, + method, + trace->streaming ? "streaming" : "single-pass", + level, + workers, + (unsigned long long)trace->dictionarySize, + (unsigned long long)trace->uncompressedSize, + (unsigned long long)trace->compressedSize, + (unsigned long long)duration, + ratio, + speed); + ZSTD_pthread_mutex_unlock(&g_mutex); +} + +/** + * These symbols override the weak symbols provided by the library. + */ + +ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx) +{ + (void)cctx; + if (g_traceFile == NULL) + return 0; + return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime); +} + +void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + PTime const beginNanos = (PTime)ctx; + PTime const endNanos = UTIL_clockSpanNano(g_enableTime); + PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0; + assert(g_traceFile != NULL); + assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */ + TRACE_log("compress", durationNanos, trace); +} + +ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx) +{ + (void)dctx; + if (g_traceFile == NULL) + return 0; + return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime); +} + +void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + PTime const beginNanos = (PTime)ctx; + PTime const endNanos = UTIL_clockSpanNano(g_enableTime); + PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0; + assert(g_traceFile != NULL); + assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */ + TRACE_log("decompress", durationNanos, trace); +} + +#else /* ZSTD_TRACE */ + +void TRACE_enable(char const* filename) +{ + (void)filename; +} + +void TRACE_finish(void) {} + +#endif /* ZSTD_TRACE */ diff --git a/programs/zstdcli_trace.h b/programs/zstdcli_trace.h new file mode 100644 index 00000000000..6ed39080812 --- /dev/null +++ b/programs/zstdcli_trace.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDCLI_TRACE_H +#define ZSTDCLI_TRACE_H + +/** + * Enable tracing - log to filename. + */ +void TRACE_enable(char const* filename); + +/** + * Shut down the tracing library. + */ +void TRACE_finish(void); + +#endif /* ZSTDCLI_TRACE_H */ diff --git a/tests/DEPRECATED-test-zstd-speed.py b/tests/DEPRECATED-test-zstd-speed.py index b3f80745984..ab699cf5fd5 100755 --- a/tests/DEPRECATED-test-zstd-speed.py +++ b/tests/DEPRECATED-test-zstd-speed.py @@ -2,7 +2,7 @@ # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py # ################################################################ -# Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +# Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/Makefile b/tests/Makefile index 42bc353c056..f060505708d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -38,8 +38,8 @@ CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes -CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) +CFLAGS += $(DEBUGFLAGS) +CPPFLAGS += $(MOREFLAGS) ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c @@ -107,7 +107,6 @@ libzstd : %-dll : libzstd %-dll : LDFLAGS += -L$(ZSTDDIR) -lzstd -.PHONY: $(ZSTDDIR)/libzstd.a $(ZSTDDIR)/libzstd.a : $(MAKE) -C $(ZSTDDIR) libzstd.a @@ -146,7 +145,7 @@ fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR # note : broken : requires symbols unavailable from dynamic library fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c # $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll - $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) + $(LINK.c) $^ $(LDLIBS) -o $@$(EXT) fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) fuzzer : LDFLAGS += $(MULTITHREAD_LD) @@ -165,7 +164,7 @@ zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings zbufftest32 : CFLAGS += -m32 zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c - $(CC) $(FLAGS) $^ -o $@$(EXT) + $(LINK.c) $^ -o $@$(EXT) zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) diff --git a/tests/automated_benchmarking.py b/tests/automated_benchmarking.py index 77eea29de27..3230821bf18 100644 --- a/tests/automated_benchmarking.py +++ b/tests/automated_benchmarking.py @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2020-2020, Facebook, Inc. +# Copyright (c) 2021-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/bigdict.c b/tests/bigdict.c index aeda56cb538..4d08ca19822 100644 --- a/tests/bigdict.c +++ b/tests/bigdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/checkTag.c b/tests/checkTag.c index 90af24ab14a..76664e057fa 100644 --- a/tests/checkTag.c +++ b/tests/checkTag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2018-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/datagencli.c b/tests/datagencli.c index 713ca9963e0..2ca56316d14 100644 --- a/tests/datagencli.c +++ b/tests/datagencli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 50935d31ec7..e4691919cfc 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fullbench.c b/tests/fullbench.c index 37f0e242f6c..5dc42ee63e7 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 36232a8cf5a..f3a561def2c 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-2020, Facebook, Inc. +# Copyright (c) 2016-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c index 64d70f005e9..2f987811111 100644 --- a/tests/fuzz/block_decompress.c +++ b/tests/fuzz/block_decompress.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c index 097fc01b89b..c88850fbb16 100644 --- a/tests/fuzz/block_round_trip.c +++ b/tests/fuzz/block_round_trip.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/decompress_dstSize_tooSmall.c b/tests/fuzz/decompress_dstSize_tooSmall.c index e47b3d049a4..76806e30474 100644 --- a/tests/fuzz/decompress_dstSize_tooSmall.c +++ b/tests/fuzz/decompress_dstSize_tooSmall.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c index 9944baa158b..17035213cda 100644 --- a/tests/fuzz/dictionary_decompress.c +++ b/tests/fuzz/dictionary_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_loader.c b/tests/fuzz/dictionary_loader.c index f1fdf4da9e6..53c252641b6 100644 --- a/tests/fuzz/dictionary_loader.c +++ b/tests/fuzz/dictionary_loader.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index 7b7771e48e2..f08f0240fa9 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_stream_round_trip.c b/tests/fuzz/dictionary_stream_round_trip.c index 67e8c69ef0a..95e84704cd8 100644 --- a/tests/fuzz/dictionary_stream_round_trip.c +++ b/tests/fuzz/dictionary_stream_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fse_read_ncount.c b/tests/fuzz/fse_read_ncount.c index e20a93829ab..fc809ef89ee 100644 --- a/tests/fuzz/fse_read_ncount.c +++ b/tests/fuzz/fse_read_ncount.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 8ee96453674..c9ba39ce806 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index ef94a53b454..05a778738aa 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # ################################################################ -# Copyright (c) 2016-2020, Facebook, Inc. +# Copyright (c) 2016-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -180,14 +180,15 @@ def compiler_version(cc, cxx): cxx_version_bytes = subprocess.check_output([cxx, "--version"]) compiler = None version = None + print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) if b'clang' in cc_version_bytes: assert(b'clang' in cxx_version_bytes) compiler = 'clang' - elif b'gcc' in cc_version_bytes: + elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) compiler = 'gcc' if compiler is not None: - version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' + version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' version_match = re.search(version_regex, cc_version_bytes) version = tuple(int(version_match.group(i)) for i in range(1, 4)) return compiler, version @@ -195,9 +196,9 @@ def compiler_version(cc, cxx): def overflow_ubsan_flags(cc, cxx): compiler, version = compiler_version(cc, cxx) - if compiler == 'gcc': + if compiler == 'gcc' and version < (8, 0, 0): return ['-fno-sanitize=signed-integer-overflow'] - if compiler == 'clang' and version >= (5, 0, 0): + if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): return ['-fno-sanitize=pointer-overflow'] return [] diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c index f2d5a1b5158..738409c44d0 100644 --- a/tests/fuzz/fuzz_data_producer.c +++ b/tests/fuzz/fuzz_data_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h index 25cc937fcce..1cfcad9eaf8 100644 --- a/tests/fuzz/fuzz_data_producer.h +++ b/tests/fuzz/fuzz_data_producer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_helpers.c b/tests/fuzz/fuzz_helpers.c index b80dc75716d..8d62ee9d8a8 100644 --- a/tests/fuzz/fuzz_helpers.c +++ b/tests/fuzz/fuzz_helpers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -29,4 +29,4 @@ int FUZZ_memcmp(void const* lhs, void const* rhs, size_t size) return 0; } return memcmp(lhs, rhs, size); -} \ No newline at end of file +} diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h index cde2c4ea7cf..7085e14cccc 100644 --- a/tests/fuzz/fuzz_helpers.h +++ b/tests/fuzz/fuzz_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/raw_dictionary_round_trip.c b/tests/fuzz/raw_dictionary_round_trip.c index 08e5fd9edc1..c6e44db6136 100644 --- a/tests/fuzz/raw_dictionary_round_trip.c +++ b/tests/fuzz/raw_dictionary_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 8180ca822fc..326cfdc2d8e 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index e838687a31d..d9003d4916a 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c index 620177fb0ef..f02223faa8b 100644 --- a/tests/fuzz/simple_compress.c +++ b/tests/fuzz/simple_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index c3903ce8bfa..4ecc22b33d5 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 6e58fb1c3fe..f0e10960164 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 5d2bb2aaf41..6117acaa99d 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index 286d3871b7a..a09b04a9d61 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c index 876a74e9aeb..8b3608ab945 100644 --- a/tests/fuzz/zstd_frame_info.c +++ b/tests/fuzz/zstd_frame_info.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 5680bd628e1..8ef77869261 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 6a4e340d358..46c6d09217a 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2e5d70ef2fe..5f707e027d5 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -675,6 +675,41 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 100, 1); + ZSTD_parameters const params = ZSTD_getParams(1, 0, 0); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compressCCtx() doesn't use advanced parameters", testNb++); + CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 1)); + if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingDict() doesn't use advanced parameters: ", testNb++); + CHECK_Z(ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, NULL, 0, NULL, 0, 1)); + if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict() doesn't use advanced parameters: ", testNb++); + CHECK_Z(ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, NULL, 0, cdict)); + if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_advanced() doesn't use advanced parameters: ", testNb++); + CHECK_Z(ZSTD_compress_advanced(cctx, compressedBuffer, compressedBufferSize, NULL, 0, NULL, 0, params)); + if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_compress_usingCDict_advanced() doesn't use advanced parameters: ", testNb++); + CHECK_Z(ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize, NULL, 0, cdict, params.fParams)); + if (MEM_readLE32(compressedBuffer) != ZSTD_MAGICNUMBER) goto _output_error; + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_freeCDict(cdict); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++); { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); @@ -1570,6 +1605,11 @@ static int basicUnitTests(U32 const seed, double compressibility) int const segs = 4; /* only use the first half so we don't push against size limit of compressedBuffer */ size_t const segSize = (CNBuffSize / 2) / segs; + + const U32 skipLen = 129 KB; + char* const skipBuff = (char*)malloc(skipLen); + assert(skipBuff != NULL); + memset(skipBuff, 0, skipLen); for (i = 0; i < segs; i++) { CHECK_NEWV(r, ZSTD_compress( (BYTE*)compressedBuffer + off, CNBuffSize - off, @@ -1578,13 +1618,15 @@ static int basicUnitTests(U32 const seed, double compressibility) off += r; if (i == segs/2) { /* insert skippable frame */ - const U32 skipLen = 129 KB; - MEM_writeLE32((BYTE*)compressedBuffer + off, ZSTD_MAGIC_SKIPPABLE_START); - MEM_writeLE32((BYTE*)compressedBuffer + off + 4, skipLen); - off += skipLen + ZSTD_SKIPPABLEHEADERSIZE; + size_t const skippableSize = + ZSTD_writeSkippableFrame((BYTE*)compressedBuffer + off, compressedBufferSize, + skipBuff, skipLen, seed % 15); + CHECK_Z(skippableSize); + off += skippableSize; } } cSize = off; + free(skipBuff); } DISPLAYLEVEL(3, "OK \n"); @@ -1763,6 +1805,19 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t dictSize; U32 dictID; size_t dictHeaderSize; + size_t dictBufferFixedSize = 144; + unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, + 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, + 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, + 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, + 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, + 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, + 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69}; if (dictBuffer==NULL || samplesSizes==NULL) { free(dictBuffer); @@ -1858,19 +1913,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++); - { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, - 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, - 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, - 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, - 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, - 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, - 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, - 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 }; - dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144); + { dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize); if (dictHeaderSize != 115) goto _output_error; } DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); @@ -2324,6 +2367,74 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++); + { + const size_t numDicts = 128; + const size_t numFrames = 4; + size_t i; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts); + ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts); + U32 dictIDSeed = seed; + /* Create new compressed buffer that will hold frames with differing dictIDs */ + char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize); /* Modifiable copy of fixed full dict buffer */ + + ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize); + /* Create a bunch of DDicts with random dict IDs */ + for (i = 0; i < numDicts; ++i) { + U32 currDictID = FUZ_rand(&dictIDSeed); + MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID); + ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize); + cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3); + if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) { + goto _output_error; + } + } + /* Compress a few frames using random CDicts */ + { + size_t off = 0; + /* only use the first half so we don't push against size limit of compressedBuffer */ + size_t const segSize = (CNBuffSize / 2) / numFrames; + for (i = 0; i < numFrames; i++) { + size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + { CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx, + (BYTE*)compressedBuffer + off, CNBuffSize - off, + (BYTE*)CNBuffer + segSize * (size_t)i, segSize, + cdictTable[dictIdx])); + off += r; + } + } + cSize = off; + } + + /* We should succeed to decompression even though different dicts were used on different frames */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts); + /* Reference every single ddict we made */ + for (i = 0; i < numDicts; ++i) { + CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i])); + } + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* Streaming decompression should also work */ + { + ZSTD_inBuffer in = {compressedBuffer, cSize, 0}; + ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0}; + while (in.pos < in.size) { + CHECK_Z(ZSTD_decompressStream(dctx, &out, &in)); + } + } + ZSTD_freeDCtx(dctx); + for (i = 0; i < numDicts; ++i) { + ZSTD_freeCDict(cdictTable[i]); + ZSTD_freeDDict(ddictTable[i]); + } + free(dictBufferMulti); + free(ddictTable); + free(cdictTable); + } + DISPLAYLEVEL(3, "OK \n"); + ZSTD_freeCCtx(cctx); free(dictBuffer); free(samplesSizes); @@ -2739,7 +2850,7 @@ static int basicUnitTests(U32 const seed, double compressibility) free(seqs); } DISPLAYLEVEL(3, "OK \n"); - + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++); { size_t srcSize = 500 KB; @@ -3044,6 +3155,32 @@ static int basicUnitTests(U32 const seed, double compressibility) free(dict); } DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_getCParams() + dictionary ", testNb++); + { + ZSTD_compressionParameters const medium = ZSTD_getCParams(1, 16*1024-1, 0); + ZSTD_compressionParameters const large = ZSTD_getCParams(1, 128*1024-1, 0); + ZSTD_compressionParameters const smallDict = ZSTD_getCParams(1, 0, 400); + ZSTD_compressionParameters const mediumDict = ZSTD_getCParams(1, 0, 10000); + ZSTD_compressionParameters const largeDict = ZSTD_getCParams(1, 0, 100000); + + assert(!memcmp(&smallDict, &mediumDict, sizeof(smallDict))); + assert(!memcmp(&medium, &mediumDict, sizeof(medium))); + assert(!memcmp(&large, &largeDict, sizeof(large))); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_adjustCParams() + dictionary ", testNb++); + { + ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, 0, 0); + ZSTD_compressionParameters const smallDict = ZSTD_adjustCParams(cParams, 0, 400); + ZSTD_compressionParameters const smallSrcAndDict = ZSTD_adjustCParams(cParams, 500, 400); + + assert(smallSrcAndDict.windowLog == 10); + assert(!memcmp(&cParams, &smallDict, sizeof(cParams))); + } + DISPLAYLEVEL(3, "OK \n"); + #endif _end: diff --git a/tests/gzip/Makefile b/tests/gzip/Makefile index 73f62f0ebf0..ac953bd8174 100644 --- a/tests/gzip/Makefile +++ b/tests/gzip/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2017-2020, Facebook, Inc. +# Copyright (c) 2017-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/invalidDictionaries.c b/tests/invalidDictionaries.c index 23e93fd5409..48ba6aebdd1 100644 --- a/tests/invalidDictionaries.c +++ b/tests/invalidDictionaries.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/legacy.c b/tests/legacy.c index 3d3ec43581f..d6407a75f23 100644 --- a/tests/legacy.c +++ b/tests/legacy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/longmatch.c b/tests/longmatch.c index 93e78dd1fc5..075b152aeee 100644 --- a/tests/longmatch.c +++ b/tests/longmatch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 439aebed8c4..89621824792 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/playTests.sh b/tests/playTests.sh index 4d6abbcc066..da0e67a35b8 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -114,13 +114,14 @@ esac case "$UNAME" in Darwin) MD5SUM="md5 -r" ;; FreeBSD) MD5SUM="gmd5sum" ;; + NetBSD) MD5SUM="md5 -n" ;; OpenBSD) MD5SUM="md5" ;; *) MD5SUM="md5sum" ;; esac MTIME="stat -c %Y" case "$UNAME" in - Darwin | FreeBSD | OpenBSD) MTIME="stat -f %m" ;; + Darwin | FreeBSD | OpenBSD | NetBSD) MTIME="stat -f %m" ;; esac DIFF="diff" @@ -485,23 +486,29 @@ rm -rf tmp* if [ "$isWindows" = false ] ; then println "\n===> compress multiple files into an output directory and mirror input folder, --output-dir-mirror" println "test --output-dir-mirror" > tmp1 - mkdir -p tmpInputTestDir/we/must/go/deeper - println cool > tmpInputTestDir/we/must/go/deeper/tmp2 + mkdir -p tmpInputTestDir/we/.../..must/go/deeper.. + println cool > tmpInputTestDir/we/.../..must/go/deeper../tmp2 zstd tmp1 -r tmpInputTestDir --output-dir-mirror tmpOutDir test -f tmpOutDir/tmp1.zst - test -f tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2.zst + test -f tmpOutDir/tmpInputTestDir/we/.../..must/go/deeper../tmp2.zst println "test: compress input dir will be ignored if it has '..'" - zstd -r tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'" + zstd -r tmpInputTestDir/we/.../..must/../..mustgo/deeper.. --output-dir-mirror non-exist && die "input cannot contain '..'" + zstd -r tmpInputTestDir/we/.../..must/deeper../.. --output-dir-mirror non-exist && die "input cannot contain '..'" + zstd -r ../tests/tmpInputTestDir/we/.../..must/deeper.. --output-dir-mirror non-exist && die "input cannot contain '..'" test ! -d non-exist + println "test: compress input dir should succeed with benign uses of '..'" + zstd -r tmpInputTestDir/we/.../..must/go/deeper.. --output-dir-mirror tmpout + test -d tmpout + println "test : decompress multiple files into an output directory, --output-dir-mirror" zstd tmpOutDir -r -d --output-dir-mirror tmpOutDirDecomp test -f tmpOutDirDecomp/tmpOutDir/tmp1 - test -f tmpOutDirDecomp/tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2 + test -f tmpOutDirDecomp/tmpOutDir/tmpInputTestDir/we/.../..must/go/deeper../tmp2 println "test: decompress input dir will be ignored if it has '..'" - zstd -r tmpOutDir/tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'" + zstd -r tmpOutDir/tmpInputTestDir/we/.../..must/../..must --output-dir-mirror non-exist && die "input cannot contain '..'" test ! -d non-exist rm -rf tmp* @@ -1322,6 +1329,21 @@ zstd -f --no-check tmp1 zstd -l tmp1.zst zstd -lv tmp1.zst +println "\n===> zstd trace tests " +zstd -f --trace tmp.trace tmp1 +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 -o /dev/null +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 --single-thread +zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null +zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null --single-thread +zstd --trace tmp.trace -t tmp1.zst +zstd --trace tmp.trace -t tmp1.zst tmp2.zst +zstd -f --trace tmp.trace -d tmp1.zst +zstd -f --trace tmp.trace -d tmp1.zst tmp2.zst tmp3.zst +zstd -D tmp1 tmp2 -c | zstd --trace tmp.trace -t -D tmp1 +zstd -b1e10i0 --trace tmp.trace tmp1 +zstd -b1e10i0 --trace tmp.trace tmp1 tmp2 tmp3 + rm tmp* diff --git a/tests/poolTests.c b/tests/poolTests.c index e1576ba85bf..c7fcfa86067 100644 --- a/tests/poolTests.c +++ b/tests/poolTests.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/rateLimiter.py b/tests/rateLimiter.py index 1068c44248f..fbec8c2f9c5 100755 --- a/tests/rateLimiter.py +++ b/tests/rateLimiter.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) 2018-2020, Facebook, Inc. +# Copyright (c) 2018-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/Makefile b/tests/regression/Makefile index 87c1c2b966f..6874d81baaf 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-2020, Facebook, Inc. +# Copyright (c) 2015-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/config.c b/tests/regression/config.c index ed6b692352c..ee0a71e4d48 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -59,6 +59,14 @@ static config_t no_pledged_src_size = { .no_pledged_src_size = 1, }; +static config_t no_pledged_src_size_with_dict = { + .name = "no source size with dict", + .cli_args = "", + .param_values = PARAM_VALUES(level_0_param_values), + .no_pledged_src_size = 1, + .use_dictionary = 1, +}; + static param_value_t const ldm_param_values[] = { {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, }; @@ -192,6 +200,7 @@ static config_t const* g_configs[] = { #undef FAST_LEVEL &no_pledged_src_size, + &no_pledged_src_size_with_dict, &ldm, &mt, &mt_ldm, diff --git a/tests/regression/config.h b/tests/regression/config.h index aa563b9e903..9c4562c8ff9 100644 --- a/tests/regression/config.h +++ b/tests/regression/config.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/data.c b/tests/regression/data.c index b75ac11928d..b06c6914f07 100644 --- a/tests/regression/data.c +++ b/tests/regression/data.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -67,10 +67,27 @@ data_t github = { }, }; +data_t github_tar = { + .name = "github.tar", + .type = data_type_file, + .data = + { + .url = REGRESSION_RELEASE("github.tar.zst"), + .xxhash64 = 0xa9b1b44b020df292LL, + }, + .dict = + { + .url = REGRESSION_RELEASE("github.dict.zst"), + .xxhash64 = 0x1eddc6f737d3cb53LL, + + }, +}; + static data_t* g_data[] = { &silesia, &silesia_tar, &github, + &github_tar, NULL, }; diff --git a/tests/regression/data.h b/tests/regression/data.h index 90ed22f19dc..9f2fc897867 100644 --- a/tests/regression/data.h +++ b/tests/regression/data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/levels.h b/tests/regression/levels.h index 5e7d40a7d0e..aedc1ce3022 100644 --- a/tests/regression/levels.h +++ b/tests/regression/levels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/method.c b/tests/regression/method.c index 3c949a278d3..c4fe94a2af4 100644 --- a/tests/regression/method.c +++ b/tests/regression/method.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/method.h b/tests/regression/method.h index 6884e54186e..d89e64d6b2c 100644 --- a/tests/regression/method.h +++ b/tests/regression/method.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/result.c b/tests/regression/result.c index 2911722cd5a..5fc37c0aa47 100644 --- a/tests/regression/result.c +++ b/tests/regression/result.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/result.h b/tests/regression/result.h index 0085c2adf13..7b456165b4e 100644 --- a/tests/regression/result.h +++ b/tests/regression/result.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 979b1d25095..0fb1d160a6f 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -16,6 +16,23 @@ silesia.tar, level 19, compress silesia.tar, uncompressed literals, compress simple, 4861425 silesia.tar, uncompressed literals optimal, compress simple, 4281605 silesia.tar, huffman literals, compress simple, 6186042 +github.tar, level -5, compress simple, 46856 +github.tar, level -3, compress simple, 43754 +github.tar, level -1, compress simple, 42490 +github.tar, level 0, compress simple, 38441 +github.tar, level 1, compress simple, 39265 +github.tar, level 3, compress simple, 38441 +github.tar, level 4, compress simple, 38467 +github.tar, level 5, compress simple, 39788 +github.tar, level 6, compress simple, 39603 +github.tar, level 7, compress simple, 39206 +github.tar, level 9, compress simple, 36717 +github.tar, level 13, compress simple, 35621 +github.tar, level 16, compress simple, 40255 +github.tar, level 19, compress simple, 32837 +github.tar, uncompressed literals, compress simple, 38441 +github.tar, uncompressed literals optimal, compress simple, 32837 +github.tar, huffman literals, compress simple, 42490 silesia, level -5, compress cctx, 6737607 silesia, level -3, compress cctx, 6444677 silesia, level -1, compress cctx, 6178460 @@ -94,9 +111,9 @@ silesia, level 9, zstdcli, silesia, level 13, zstdcli, 4482183 silesia, level 16, zstdcli, 4377513 silesia, level 19, zstdcli, 4293378 -silesia, long distance mode, zstdcli, 4839756 +silesia, long distance mode, zstdcli, 4840792 silesia, multithreaded, zstdcli, 4849600 -silesia, multithreaded long distance mode, zstdcli, 4839756 +silesia, multithreaded long distance mode, zstdcli, 4840792 silesia, small window log, zstdcli, 7111012 silesia, small hash log, zstdcli, 6555069 silesia, small chain log, zstdcli, 4931196 @@ -120,9 +137,9 @@ silesia.tar, level 13, zstdcli, silesia.tar, level 16, zstdcli, 4381336 silesia.tar, level 19, zstdcli, 4281609 silesia.tar, no source size, zstdcli, 4861508 -silesia.tar, long distance mode, zstdcli, 4853190 +silesia.tar, long distance mode, zstdcli, 4853153 silesia.tar, multithreaded, zstdcli, 4861512 -silesia.tar, multithreaded long distance mode, zstdcli, 4853190 +silesia.tar, multithreaded long distance mode, zstdcli, 4853153 silesia.tar, small window log, zstdcli, 7101576 silesia.tar, small hash log, zstdcli, 6587959 silesia.tar, small chain log, zstdcli, 4943310 @@ -170,6 +187,47 @@ github, uncompressed literals, zstdcli, github, uncompressed literals optimal, zstdcli, 159227 github, huffman literals, zstdcli, 144465 github, multithreaded with advanced params, zstdcli, 167915 +github.tar, level -5, zstdcli, 46751 +github.tar, level -5 with dict, zstdcli, 43975 +github.tar, level -3, zstdcli, 43541 +github.tar, level -3 with dict, zstdcli, 40809 +github.tar, level -1, zstdcli, 42469 +github.tar, level -1 with dict, zstdcli, 41126 +github.tar, level 0, zstdcli, 38445 +github.tar, level 0 with dict, zstdcli, 37999 +github.tar, level 1, zstdcli, 39346 +github.tar, level 1 with dict, zstdcli, 38313 +github.tar, level 3, zstdcli, 38445 +github.tar, level 3 with dict, zstdcli, 37999 +github.tar, level 4, zstdcli, 38471 +github.tar, level 4 with dict, zstdcli, 37952 +github.tar, level 5, zstdcli, 39792 +github.tar, level 5 with dict, zstdcli, 39231 +github.tar, level 6, zstdcli, 39607 +github.tar, level 6 with dict, zstdcli, 38669 +github.tar, level 7, zstdcli, 39210 +github.tar, level 7 with dict, zstdcli, 37958 +github.tar, level 9, zstdcli, 36721 +github.tar, level 9 with dict, zstdcli, 36886 +github.tar, level 13, zstdcli, 35625 +github.tar, level 13 with dict, zstdcli, 38730 +github.tar, level 16, zstdcli, 40259 +github.tar, level 16 with dict, zstdcli, 33643 +github.tar, level 19, zstdcli, 32841 +github.tar, level 19 with dict, zstdcli, 32899 +github.tar, no source size, zstdcli, 38442 +github.tar, no source size with dict, zstdcli, 38004 +github.tar, long distance mode, zstdcli, 39726 +github.tar, multithreaded, zstdcli, 38445 +github.tar, multithreaded long distance mode, zstdcli, 39726 +github.tar, small window log, zstdcli, 199432 +github.tar, small hash log, zstdcli, 129874 +github.tar, small chain log, zstdcli, 41673 +github.tar, explicit params, zstdcli, 41199 +github.tar, uncompressed literals, zstdcli, 41126 +github.tar, uncompressed literals optimal, zstdcli, 35392 +github.tar, huffman literals, zstdcli, 38804 +github.tar, multithreaded with advanced params, zstdcli, 41126 silesia, level -5, advanced one pass, 6737607 silesia, level -3, advanced one pass, 6444677 silesia, level -1, advanced one pass, 6178460 @@ -185,9 +243,9 @@ silesia, level 13, advanced silesia, level 16, advanced one pass, 4377465 silesia, level 19, advanced one pass, 4293330 silesia, no source size, advanced one pass, 4849552 -silesia, long distance mode, advanced one pass, 4839708 +silesia, long distance mode, advanced one pass, 4840744 silesia, multithreaded, advanced one pass, 4849552 -silesia, multithreaded long distance mode, advanced one pass, 4839708 +silesia, multithreaded long distance mode, advanced one pass, 4840744 silesia, small window log, advanced one pass, 7095919 silesia, small hash log, advanced one pass, 6555021 silesia, small chain log, advanced one pass, 4931148 @@ -211,9 +269,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced one pass, 4381332 silesia.tar, level 19, advanced one pass, 4281605 silesia.tar, no source size, advanced one pass, 4861425 -silesia.tar, long distance mode, advanced one pass, 4848098 +silesia.tar, long distance mode, advanced one pass, 4847735 silesia.tar, multithreaded, advanced one pass, 4861508 -silesia.tar, multithreaded long distance mode, advanced one pass, 4853186 +silesia.tar, multithreaded long distance mode, advanced one pass, 4853149 silesia.tar, small window log, advanced one pass, 7101530 silesia.tar, small hash log, advanced one pass, 6587951 silesia.tar, small chain log, advanced one pass, 4943307 @@ -251,6 +309,7 @@ github, level 16 with dict, advanced github, level 19, advanced one pass, 134064 github, level 19 with dict, advanced one pass, 37576 github, no source size, advanced one pass, 136335 +github, no source size with dict, advanced one pass, 41148 github, long distance mode, advanced one pass, 136335 github, multithreaded, advanced one pass, 136335 github, multithreaded long distance mode, advanced one pass, 136335 @@ -262,6 +321,47 @@ github, uncompressed literals, advanced github, uncompressed literals optimal, advanced one pass, 157227 github, huffman literals, advanced one pass, 142465 github, multithreaded with advanced params, advanced one pass, 165915 +github.tar, level -5, advanced one pass, 46856 +github.tar, level -5 with dict, advanced one pass, 43971 +github.tar, level -3, advanced one pass, 43754 +github.tar, level -3 with dict, advanced one pass, 40805 +github.tar, level -1, advanced one pass, 42490 +github.tar, level -1 with dict, advanced one pass, 41122 +github.tar, level 0, advanced one pass, 38441 +github.tar, level 0 with dict, advanced one pass, 37995 +github.tar, level 1, advanced one pass, 39265 +github.tar, level 1 with dict, advanced one pass, 38309 +github.tar, level 3, advanced one pass, 38441 +github.tar, level 3 with dict, advanced one pass, 37995 +github.tar, level 4, advanced one pass, 38467 +github.tar, level 4 with dict, advanced one pass, 37948 +github.tar, level 5, advanced one pass, 39788 +github.tar, level 5 with dict, advanced one pass, 39715 +github.tar, level 6, advanced one pass, 39603 +github.tar, level 6 with dict, advanced one pass, 38800 +github.tar, level 7, advanced one pass, 39206 +github.tar, level 7 with dict, advanced one pass, 38071 +github.tar, level 9, advanced one pass, 36717 +github.tar, level 9 with dict, advanced one pass, 36898 +github.tar, level 13, advanced one pass, 35621 +github.tar, level 13 with dict, advanced one pass, 38726 +github.tar, level 16, advanced one pass, 40255 +github.tar, level 16 with dict, advanced one pass, 33639 +github.tar, level 19, advanced one pass, 32837 +github.tar, level 19 with dict, advanced one pass, 32895 +github.tar, no source size, advanced one pass, 38441 +github.tar, no source size with dict, advanced one pass, 37995 +github.tar, long distance mode, advanced one pass, 39722 +github.tar, multithreaded, advanced one pass, 38441 +github.tar, multithreaded long distance mode, advanced one pass, 39722 +github.tar, small window log, advanced one pass, 198540 +github.tar, small hash log, advanced one pass, 129870 +github.tar, small chain log, advanced one pass, 41669 +github.tar, explicit params, advanced one pass, 41199 +github.tar, uncompressed literals, advanced one pass, 41122 +github.tar, uncompressed literals optimal, advanced one pass, 35388 +github.tar, huffman literals, advanced one pass, 38777 +github.tar, multithreaded with advanced params, advanced one pass, 41122 silesia, level -5, advanced one pass small out, 6737607 silesia, level -3, advanced one pass small out, 6444677 silesia, level -1, advanced one pass small out, 6178460 @@ -277,9 +377,9 @@ silesia, level 13, advanced silesia, level 16, advanced one pass small out, 4377465 silesia, level 19, advanced one pass small out, 4293330 silesia, no source size, advanced one pass small out, 4849552 -silesia, long distance mode, advanced one pass small out, 4839708 +silesia, long distance mode, advanced one pass small out, 4840744 silesia, multithreaded, advanced one pass small out, 4849552 -silesia, multithreaded long distance mode, advanced one pass small out, 4839708 +silesia, multithreaded long distance mode, advanced one pass small out, 4840744 silesia, small window log, advanced one pass small out, 7095919 silesia, small hash log, advanced one pass small out, 6555021 silesia, small chain log, advanced one pass small out, 4931148 @@ -303,9 +403,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced one pass small out, 4381332 silesia.tar, level 19, advanced one pass small out, 4281605 silesia.tar, no source size, advanced one pass small out, 4861425 -silesia.tar, long distance mode, advanced one pass small out, 4848098 +silesia.tar, long distance mode, advanced one pass small out, 4847735 silesia.tar, multithreaded, advanced one pass small out, 4861508 -silesia.tar, multithreaded long distance mode, advanced one pass small out, 4853186 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4853149 silesia.tar, small window log, advanced one pass small out, 7101530 silesia.tar, small hash log, advanced one pass small out, 6587951 silesia.tar, small chain log, advanced one pass small out, 4943307 @@ -343,6 +443,7 @@ github, level 16 with dict, advanced github, level 19, advanced one pass small out, 134064 github, level 19 with dict, advanced one pass small out, 37576 github, no source size, advanced one pass small out, 136335 +github, no source size with dict, advanced one pass small out, 41148 github, long distance mode, advanced one pass small out, 136335 github, multithreaded, advanced one pass small out, 136335 github, multithreaded long distance mode, advanced one pass small out, 136335 @@ -354,6 +455,47 @@ github, uncompressed literals, advanced github, uncompressed literals optimal, advanced one pass small out, 157227 github, huffman literals, advanced one pass small out, 142465 github, multithreaded with advanced params, advanced one pass small out, 165915 +github.tar, level -5, advanced one pass small out, 46856 +github.tar, level -5 with dict, advanced one pass small out, 43971 +github.tar, level -3, advanced one pass small out, 43754 +github.tar, level -3 with dict, advanced one pass small out, 40805 +github.tar, level -1, advanced one pass small out, 42490 +github.tar, level -1 with dict, advanced one pass small out, 41122 +github.tar, level 0, advanced one pass small out, 38441 +github.tar, level 0 with dict, advanced one pass small out, 37995 +github.tar, level 1, advanced one pass small out, 39265 +github.tar, level 1 with dict, advanced one pass small out, 38309 +github.tar, level 3, advanced one pass small out, 38441 +github.tar, level 3 with dict, advanced one pass small out, 37995 +github.tar, level 4, advanced one pass small out, 38467 +github.tar, level 4 with dict, advanced one pass small out, 37948 +github.tar, level 5, advanced one pass small out, 39788 +github.tar, level 5 with dict, advanced one pass small out, 39715 +github.tar, level 6, advanced one pass small out, 39603 +github.tar, level 6 with dict, advanced one pass small out, 38800 +github.tar, level 7, advanced one pass small out, 39206 +github.tar, level 7 with dict, advanced one pass small out, 38071 +github.tar, level 9, advanced one pass small out, 36717 +github.tar, level 9 with dict, advanced one pass small out, 36898 +github.tar, level 13, advanced one pass small out, 35621 +github.tar, level 13 with dict, advanced one pass small out, 38726 +github.tar, level 16, advanced one pass small out, 40255 +github.tar, level 16 with dict, advanced one pass small out, 33639 +github.tar, level 19, advanced one pass small out, 32837 +github.tar, level 19 with dict, advanced one pass small out, 32895 +github.tar, no source size, advanced one pass small out, 38441 +github.tar, no source size with dict, advanced one pass small out, 37995 +github.tar, long distance mode, advanced one pass small out, 39722 +github.tar, multithreaded, advanced one pass small out, 38441 +github.tar, multithreaded long distance mode, advanced one pass small out, 39722 +github.tar, small window log, advanced one pass small out, 198540 +github.tar, small hash log, advanced one pass small out, 129870 +github.tar, small chain log, advanced one pass small out, 41669 +github.tar, explicit params, advanced one pass small out, 41199 +github.tar, uncompressed literals, advanced one pass small out, 41122 +github.tar, uncompressed literals optimal, advanced one pass small out, 35388 +github.tar, huffman literals, advanced one pass small out, 38777 +github.tar, multithreaded with advanced params, advanced one pass small out, 41122 silesia, level -5, advanced streaming, 6882505 silesia, level -3, advanced streaming, 6568376 silesia, level -1, advanced streaming, 6183403 @@ -369,9 +511,9 @@ silesia, level 13, advanced silesia, level 16, advanced streaming, 4377465 silesia, level 19, advanced streaming, 4293330 silesia, no source size, advanced streaming, 4849516 -silesia, long distance mode, advanced streaming, 4839708 +silesia, long distance mode, advanced streaming, 4840744 silesia, multithreaded, advanced streaming, 4849552 -silesia, multithreaded long distance mode, advanced streaming, 4839708 +silesia, multithreaded long distance mode, advanced streaming, 4840744 silesia, small window log, advanced streaming, 7112062 silesia, small hash log, advanced streaming, 6555021 silesia, small chain log, advanced streaming, 4931148 @@ -395,9 +537,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced streaming, 4381350 silesia.tar, level 19, advanced streaming, 4281562 silesia.tar, no source size, advanced streaming, 4861423 -silesia.tar, long distance mode, advanced streaming, 4848098 +silesia.tar, long distance mode, advanced streaming, 4847735 silesia.tar, multithreaded, advanced streaming, 4861508 -silesia.tar, multithreaded long distance mode, advanced streaming, 4853186 +silesia.tar, multithreaded long distance mode, advanced streaming, 4853149 silesia.tar, small window log, advanced streaming, 7118769 silesia.tar, small hash log, advanced streaming, 6587952 silesia.tar, small chain log, advanced streaming, 4943312 @@ -435,6 +577,7 @@ github, level 16 with dict, advanced github, level 19, advanced streaming, 134064 github, level 19 with dict, advanced streaming, 37576 github, no source size, advanced streaming, 136335 +github, no source size with dict, advanced streaming, 41148 github, long distance mode, advanced streaming, 136335 github, multithreaded, advanced streaming, 136335 github, multithreaded long distance mode, advanced streaming, 136335 @@ -446,6 +589,47 @@ github, uncompressed literals, advanced github, uncompressed literals optimal, advanced streaming, 157227 github, huffman literals, advanced streaming, 142465 github, multithreaded with advanced params, advanced streaming, 165915 +github.tar, level -5, advanced streaming, 46747 +github.tar, level -5 with dict, advanced streaming, 43971 +github.tar, level -3, advanced streaming, 43537 +github.tar, level -3 with dict, advanced streaming, 40805 +github.tar, level -1, advanced streaming, 42465 +github.tar, level -1 with dict, advanced streaming, 41122 +github.tar, level 0, advanced streaming, 38441 +github.tar, level 0 with dict, advanced streaming, 37995 +github.tar, level 1, advanced streaming, 39342 +github.tar, level 1 with dict, advanced streaming, 38309 +github.tar, level 3, advanced streaming, 38441 +github.tar, level 3 with dict, advanced streaming, 37995 +github.tar, level 4, advanced streaming, 38467 +github.tar, level 4 with dict, advanced streaming, 37948 +github.tar, level 5, advanced streaming, 39788 +github.tar, level 5 with dict, advanced streaming, 39715 +github.tar, level 6, advanced streaming, 39603 +github.tar, level 6 with dict, advanced streaming, 38800 +github.tar, level 7, advanced streaming, 39206 +github.tar, level 7 with dict, advanced streaming, 38071 +github.tar, level 9, advanced streaming, 36717 +github.tar, level 9 with dict, advanced streaming, 36898 +github.tar, level 13, advanced streaming, 35621 +github.tar, level 13 with dict, advanced streaming, 38726 +github.tar, level 16, advanced streaming, 40255 +github.tar, level 16 with dict, advanced streaming, 33639 +github.tar, level 19, advanced streaming, 32837 +github.tar, level 19 with dict, advanced streaming, 32895 +github.tar, no source size, advanced streaming, 38438 +github.tar, no source size with dict, advanced streaming, 38000 +github.tar, long distance mode, advanced streaming, 39722 +github.tar, multithreaded, advanced streaming, 38441 +github.tar, multithreaded long distance mode, advanced streaming, 39722 +github.tar, small window log, advanced streaming, 199558 +github.tar, small hash log, advanced streaming, 129870 +github.tar, small chain log, advanced streaming, 41669 +github.tar, explicit params, advanced streaming, 41199 +github.tar, uncompressed literals, advanced streaming, 41122 +github.tar, uncompressed literals optimal, advanced streaming, 35388 +github.tar, huffman literals, advanced streaming, 38800 +github.tar, multithreaded with advanced params, advanced streaming, 41122 silesia, level -5, old streaming, 6882505 silesia, level -3, old streaming, 6568376 silesia, level -1, old streaming, 6183403 @@ -511,9 +695,43 @@ github, level 16 with dict, old stre github, level 19, old streaming, 134064 github, level 19 with dict, old streaming, 37576 github, no source size, old streaming, 140632 +github, no source size with dict, old streaming, 40654 github, uncompressed literals, old streaming, 136335 github, uncompressed literals optimal, old streaming, 134064 github, huffman literals, old streaming, 175568 +github.tar, level -5, old streaming, 46747 +github.tar, level -5 with dict, old streaming, 43971 +github.tar, level -3, old streaming, 43537 +github.tar, level -3 with dict, old streaming, 40805 +github.tar, level -1, old streaming, 42465 +github.tar, level -1 with dict, old streaming, 41122 +github.tar, level 0, old streaming, 38441 +github.tar, level 0 with dict, old streaming, 37995 +github.tar, level 1, old streaming, 39342 +github.tar, level 1 with dict, old streaming, 38309 +github.tar, level 3, old streaming, 38441 +github.tar, level 3 with dict, old streaming, 37995 +github.tar, level 4, old streaming, 38467 +github.tar, level 4 with dict, old streaming, 37948 +github.tar, level 5, old streaming, 39788 +github.tar, level 5 with dict, old streaming, 39715 +github.tar, level 6, old streaming, 39603 +github.tar, level 6 with dict, old streaming, 38800 +github.tar, level 7, old streaming, 39206 +github.tar, level 7 with dict, old streaming, 38071 +github.tar, level 9, old streaming, 36717 +github.tar, level 9 with dict, old streaming, 36898 +github.tar, level 13, old streaming, 35621 +github.tar, level 13 with dict, old streaming, 38726 +github.tar, level 16, old streaming, 40255 +github.tar, level 16 with dict, old streaming, 33639 +github.tar, level 19, old streaming, 32837 +github.tar, level 19 with dict, old streaming, 32895 +github.tar, no source size, old streaming, 38438 +github.tar, no source size with dict, old streaming, 38000 +github.tar, uncompressed literals, old streaming, 38441 +github.tar, uncompressed literals optimal, old streaming, 32837 +github.tar, huffman literals, old streaming, 42465 silesia, level -5, old streaming advanced, 6882505 silesia, level -3, old streaming advanced, 6568376 silesia, level -1, old streaming advanced, 6183403 @@ -595,6 +813,7 @@ github, level 16 with dict, old stre github, level 19, old streaming advanced, 134064 github, level 19 with dict, old streaming advanced, 37576 github, no source size, old streaming advanced, 140632 +github, no source size with dict, old streaming advanced, 40608 github, long distance mode, old streaming advanced, 141104 github, multithreaded, old streaming advanced, 141104 github, multithreaded long distance mode, old streaming advanced, 141104 @@ -606,6 +825,47 @@ github, uncompressed literals, old stre github, uncompressed literals optimal, old streaming advanced, 134064 github, huffman literals, old streaming advanced, 181108 github, multithreaded with advanced params, old streaming advanced, 141104 +github.tar, level -5, old streaming advanced, 46747 +github.tar, level -5 with dict, old streaming advanced, 44824 +github.tar, level -3, old streaming advanced, 43537 +github.tar, level -3 with dict, old streaming advanced, 41800 +github.tar, level -1, old streaming advanced, 42465 +github.tar, level -1 with dict, old streaming advanced, 41471 +github.tar, level 0, old streaming advanced, 38441 +github.tar, level 0 with dict, old streaming advanced, 38013 +github.tar, level 1, old streaming advanced, 39342 +github.tar, level 1 with dict, old streaming advanced, 38940 +github.tar, level 3, old streaming advanced, 38441 +github.tar, level 3 with dict, old streaming advanced, 38013 +github.tar, level 4, old streaming advanced, 38467 +github.tar, level 4 with dict, old streaming advanced, 38063 +github.tar, level 5, old streaming advanced, 39788 +github.tar, level 5 with dict, old streaming advanced, 39310 +github.tar, level 6, old streaming advanced, 39603 +github.tar, level 6 with dict, old streaming advanced, 39279 +github.tar, level 7, old streaming advanced, 39206 +github.tar, level 7 with dict, old streaming advanced, 38728 +github.tar, level 9, old streaming advanced, 36717 +github.tar, level 9 with dict, old streaming advanced, 36504 +github.tar, level 13, old streaming advanced, 35621 +github.tar, level 13 with dict, old streaming advanced, 36035 +github.tar, level 16, old streaming advanced, 40255 +github.tar, level 16 with dict, old streaming advanced, 38736 +github.tar, level 19, old streaming advanced, 32837 +github.tar, level 19 with dict, old streaming advanced, 32876 +github.tar, no source size, old streaming advanced, 38438 +github.tar, no source size with dict, old streaming advanced, 38015 +github.tar, long distance mode, old streaming advanced, 38441 +github.tar, multithreaded, old streaming advanced, 38441 +github.tar, multithreaded long distance mode, old streaming advanced, 38441 +github.tar, small window log, old streaming advanced, 199561 +github.tar, small hash log, old streaming advanced, 129870 +github.tar, small chain log, old streaming advanced, 41669 +github.tar, explicit params, old streaming advanced, 41199 +github.tar, uncompressed literals, old streaming advanced, 38441 +github.tar, uncompressed literals optimal, old streaming advanced, 32837 +github.tar, huffman literals, old streaming advanced, 42465 +github.tar, multithreaded with advanced params, old streaming advanced, 38441 github, level -5 with dict, old streaming cdcit, 46718 github, level -3 with dict, old streaming cdcit, 45395 github, level -1 with dict, old streaming cdcit, 43170 @@ -620,6 +880,22 @@ github, level 9 with dict, old stre github, level 13 with dict, old streaming cdcit, 39743 github, level 16 with dict, old streaming cdcit, 37577 github, level 19 with dict, old streaming cdcit, 37576 +github, no source size with dict, old streaming cdcit, 40654 +github.tar, level -5 with dict, old streaming cdcit, 45018 +github.tar, level -3 with dict, old streaming cdcit, 41886 +github.tar, level -1 with dict, old streaming cdcit, 41636 +github.tar, level 0 with dict, old streaming cdcit, 37956 +github.tar, level 1 with dict, old streaming cdcit, 38766 +github.tar, level 3 with dict, old streaming cdcit, 37956 +github.tar, level 4 with dict, old streaming cdcit, 37927 +github.tar, level 5 with dict, old streaming cdcit, 39209 +github.tar, level 6 with dict, old streaming cdcit, 38983 +github.tar, level 7 with dict, old streaming cdcit, 38584 +github.tar, level 9 with dict, old streaming cdcit, 36363 +github.tar, level 13 with dict, old streaming cdcit, 36372 +github.tar, level 16 with dict, old streaming cdcit, 39353 +github.tar, level 19 with dict, old streaming cdcit, 32676 +github.tar, no source size with dict, old streaming cdcit, 38000 github, level -5 with dict, old streaming advanced cdict, 49562 github, level -3 with dict, old streaming advanced cdict, 44956 github, level -1 with dict, old streaming advanced cdict, 42383 @@ -634,3 +910,19 @@ github, level 9 with dict, old stre github, level 13 with dict, old streaming advanced cdict, 39731 github, level 16 with dict, old streaming advanced cdict, 40789 github, level 19 with dict, old streaming advanced cdict, 37576 +github, no source size with dict, old streaming advanced cdict, 40608 +github.tar, level -5 with dict, old streaming advanced cdict, 44307 +github.tar, level -3 with dict, old streaming advanced cdict, 41359 +github.tar, level -1 with dict, old streaming advanced cdict, 41322 +github.tar, level 0 with dict, old streaming advanced cdict, 38013 +github.tar, level 1 with dict, old streaming advanced cdict, 39002 +github.tar, level 3 with dict, old streaming advanced cdict, 38013 +github.tar, level 4 with dict, old streaming advanced cdict, 38063 +github.tar, level 5 with dict, old streaming advanced cdict, 39310 +github.tar, level 6 with dict, old streaming advanced cdict, 39279 +github.tar, level 7 with dict, old streaming advanced cdict, 38728 +github.tar, level 9 with dict, old streaming advanced cdict, 36504 +github.tar, level 13 with dict, old streaming advanced cdict, 36035 +github.tar, level 16 with dict, old streaming advanced cdict, 38736 +github.tar, level 19 with dict, old streaming advanced cdict, 32876 +github.tar, no source size with dict, old streaming advanced cdict, 38015 diff --git a/tests/regression/test.c b/tests/regression/test.c index ff2cdba3063..5e28d4cd9da 100644 --- a/tests/regression/test.c +++ b/tests/regression/test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Facebook, Inc. + * Copyright (c) 2016-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c index c117d2c268a..9e3b8614dce 100644 --- a/tests/roundTripCrash.c +++ b/tests/roundTripCrash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/seqgen.c b/tests/seqgen.c index 29c0c40545a..3461522e228 100644 --- a/tests/seqgen.c +++ b/tests/seqgen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Facebook, Inc. + * Copyright (c) 2017-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/seqgen.h b/tests/seqgen.h index 808099ba9fb..e4948ea38a9 100644 --- a/tests/seqgen.h +++ b/tests/seqgen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, Facebook, Inc. + * Copyright (c) 2017-2021, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/test-license.py b/tests/test-license.py index 522884dba31..a7e2ba4e37e 100755 --- a/tests/test-license.py +++ b/tests/test-license.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) 2016-2020, Facebook, Inc. +# Copyright (c) 2016-2021, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -138,4 +138,4 @@ def main(): return len(invalid_files) if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index fa217175271..4b2dc6a3da5 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -2,7 +2,7 @@ """Test zstd interoperability between versions""" # ################################################################ -# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. +# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/zbufftest.c b/tests/zbufftest.c index cd3706af442..557e622592f 100644 --- a/tests/zbufftest.c +++ b/tests/zbufftest.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index fa18ea4b472..688fec6876e 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile index d74c41bc4b8..6fd5ac3bbec 100644 --- a/zlibWrapper/Makefile +++ b/zlibWrapper/Makefile @@ -6,7 +6,7 @@ # Paths to static and dynamic zlib and zstd libraries -# Use "make ZLIB_PATH=path/to/zlib ZLIB_LIBRARY=path/to/libz.a" to select a path to library +# Use "make ZLIB_PATH=path/to/zlib ZLIB_LIBRARY=path/to/libz.so" to select a path to library ZLIB_LIBRARY ?= -lz ZLIB_PATH ?= . @@ -18,7 +18,8 @@ EXAMPLE_PATH = examples PROGRAMS_PATH = ../programs TEST_FILE = ../doc/zstd_compression_format.md -VPATH = $(PROGRAMS_PATH) +vpath %.c $(PROGRAMS_PATH) $(EXAMPLE_PATH) $(ZLIBWRAPPER_PATH) + CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \ -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH) @@ -28,7 +29,9 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \ -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \ -Wstrict-aliasing=1 CFLAGS ?= -O3 -CFLAGS += $(STDFLAGS) $(DEBUGFLAGS) $(MOREFLAGS) +CFLAGS += $(STDFLAGS) $(DEBUGFLAGS) +CPPFLAGS += $(MOREFLAGS) +LDLIBS += $(ZLIB_LIBRARY) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) @@ -61,7 +64,7 @@ test: example fitblk example_zstd fitblk_zstd zwrapbench minigzip minigzip_zstd ./minigzip_zstd -d example$(EXT).gz @echo ---- minigzip end ---- ./zwrapbench -qi1b3B1K $(TEST_FILE) - ./zwrapbench -rqi1b1e5 ../lib ../programs ../tests + ./zwrapbench -rqi1b1e3 ../lib #valgrindTest: ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.so valgrindTest: VALGRIND = LD_LIBRARY_PATH=$(ZSTDLIBDIR) valgrind --track-origins=yes --leak-check=full --error-exitcode=1 @@ -79,35 +82,32 @@ valgrindTest: clean example fitblk example_zstd fitblk_zstd zwrapbench #.c.o: # $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ -minigzip: $(EXAMPLE_PATH)/minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@ +minigzip: minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) -minigzip_zstd: $(EXAMPLE_PATH)/minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZSTDLIBRARY) $(ZLIB_LIBRARY) -o $@ +minigzip_zstd: minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) + $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) -example: $(EXAMPLE_PATH)/example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@ +example: example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) -example_zstd: $(EXAMPLE_PATH)/example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@ +example_zstd: example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) + $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) -fitblk: $(EXAMPLE_PATH)/fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@ +fitblk: fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY) -fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@ +fitblk_zstd: fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY) + $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) -zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY) - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@ +zwrapbench: zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY) -zstd_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h +zstd_zlibwrapper.o: zstd_zlibwrapper.h zstdTurnedOn_zlibwrapper.o: CPPFLAGS += -DZWRAP_USE_ZSTD=1 -zstdTurnedOn_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h - $(CC) $(CPPFLAGS) $(CFLAGS) $< -c -o $@ +zstdTurnedOn_zlibwrapper.o: zstd_zlibwrapper.c zstd_zlibwrapper.h + $(COMPILE.c) $< $(OUTPUT_OPTION) + -$(ZSTDLIBDIR)/libzstd.a: +$(ZSTDLIBRARY): $(MAKE) -C $(ZSTDLIBDIR) libzstd.a $(ZSTDLIBDIR)/libzstd.so: diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c index f30cad40c71..127f6114bb7 100644 --- a/zlibWrapper/examples/zwrapbench.c +++ b/zlibWrapper/examples/zwrapbench.c @@ -270,8 +270,10 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, do { U32 blockNb; for (blockNb=0; blockNb totalInBytes == 0) { if (zwc->comprState == ZWRAP_useReset) { - size_t const resetErr = ZSTD_resetCStream(zwc->zbc, (flush == Z_FINISH) ? strm->avail_in : zwc->pledgedSrcSize); + size_t resetErr = ZSTD_CCtx_reset(zwc->zbc, ZSTD_reset_session_only); if (ZSTD_isError(resetErr)) { - LOG_WRAPPERC("ERROR: ZSTD_resetCStream errorCode=%s\n", + LOG_WRAPPERC("ERROR: ZSTD_CCtx_reset errorCode=%s\n", + ZSTD_getErrorName(resetErr)); + return ZWRAPC_finishWithError(zwc, strm, 0); + } + resetErr = ZSTD_CCtx_setPledgedSrcSize(zwc->zbc, (flush == Z_FINISH) ? strm->avail_in : zwc->pledgedSrcSize); + if (ZSTD_isError(resetErr)) { + LOG_WRAPPERC("ERROR: ZSTD_CCtx_setPledgedSrcSize errorCode=%s\n", ZSTD_getErrorName(resetErr)); return ZWRAPC_finishWithError(zwc, strm, 0); } @@ -829,7 +835,7 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush)) goto error; } } else { - size_t const resetErr = ZSTD_resetDStream(zwd->zbd); + size_t const resetErr = ZSTD_DCtx_reset(zwd->zbd, ZSTD_reset_session_only); if (ZSTD_isError(resetErr)) goto error; } } else { @@ -849,7 +855,7 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush)) goto error; } } else { - size_t const resetErr = ZSTD_resetDStream(zwd->zbd); + size_t const resetErr = ZSTD_DCtx_reset(zwd->zbd, ZSTD_reset_session_only); if (ZSTD_isError(resetErr)) goto error; } diff --git a/zlibWrapper/zstd_zlibwrapper.h b/zlibWrapper/zstd_zlibwrapper.h index e791043e1d0..042ab9f84fd 100644 --- a/zlibWrapper/zstd_zlibwrapper.h +++ b/zlibWrapper/zstd_zlibwrapper.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the