diff --git a/CHANGELOG b/CHANGELOG index 23c0128203f..afb80ed9ea7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln lib: fix CCtx size estimation with external sequence producer, by @embg lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt +lib: fix several bugs in magicless-format decoding, by @embg cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973 cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md index 83d4071cb4d..b570f73145d 100644 --- a/doc/decompressor_errata.md +++ b/doc/decompressor_errata.md @@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`. See the compressor workaround code: https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682 + +Magicless format +---------------------- + +**Last affected version**: v1.5.5 + +**Affected decompressor component(s)**: Library + +**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232) + +**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59` + +v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames. +These include but are not limited to: +* Valid frames that happen to begin with a legacy magic number (little-endian) +* Valid frames that happen to begin with a skippable magic number (little-endian) + +If you are affected by this issue and cannot update to v1.5.6 or later, there is a +workaround to recover affected data. Simply prepend the ZSTD magic number +`0xFD2FB528` (little-endian) to your data and decompress using the standard-format +decoder. diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index ee2cda3b639..2f03cf7b0c7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, while (srcSize >= ZSTD_startingInputLength(dctx->format)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { + if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) { size_t decodedSize; size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); if (ZSTD_isError(frameSize)) return frameSize; @@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - if (srcSize >= 4) { + if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { U32 const magicNumber = MEM_readLE32(src); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { @@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + assert(dctx->format != ZSTD_f_zstd1_magicless); ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; @@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(4, "Consume header"); FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + if (zds->format == ZSTD_f_zstd1 + && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index f96adcfaea7..2f5a25fb114 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -124,7 +124,8 @@ FUZZ_TARGETS := \ sequence_compression_api \ seekable_roundtrip \ huf_round_trip \ - huf_decompress + huf_decompress \ + decompress_cross_format all: libregression.a $(FUZZ_TARGETS) @@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@ libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/decompress_cross_format.c b/tests/fuzz/decompress_cross_format.c new file mode 100644 index 00000000000..78461e697b1 --- /dev/null +++ b/tests/fuzz/decompress_cross_format.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +// This fuzz target validates decompression of magicless-format compressed data. + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "fuzz_data_producer.h" + +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + // Give a random portion of src data to the producer, to use for parameter generation. + // The rest will be interpreted as magicless compressed data. + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer); + const void* const magiclessSrc = src; + size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void* const standardDst = FUZZ_malloc(dstSize); + void* const magiclessDst = FUZZ_malloc(dstSize); + + // Create standard-format src from magicless-format src + const uint32_t zstd_magic = ZSTD_MAGICNUMBER; + size_t standardSize = sizeof(zstd_magic) + magiclessSize; + void* const standardSrc = FUZZ_malloc(standardSize); + memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine + memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize); + + // Truncate to a single frame + { + const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize); + if (ZSTD_isError(standardFrameCompressedSize)) { + goto cleanup_and_return; + } + standardSize = standardFrameCompressedSize; + magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic); + } + + // Create DCtx if needed + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + // Test one-shot decompression + { + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1)); + const size_t standardRet = ZSTD_decompressDCtx( + dctx, standardDst, dstSize, standardSrc, standardSize); + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless)); + const size_t magiclessRet = ZSTD_decompressDCtx( + dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize); + + // Standard accepts => magicless should accept + if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet); + + // Magicless accepts => standard should accept + // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy. + if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet); + + // If both accept, decompressed size and data should match + if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) { + FUZZ_ASSERT(standardRet == magiclessRet); + if (standardRet > 0) { + FUZZ_ASSERT( + memcmp(standardDst, magiclessDst, standardRet) == 0 + ); + } + } + } + + // Test streaming decompression + { + ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 }; + ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 }; + ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 }; + ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 }; + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1)); + const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn); + + FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless)); + const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn); + + // Standard accepts => magicless should accept + if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0); + + // Magicless accepts => standard should accept + // NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy. + if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0); + + // If both accept, decompressed size and data should match + if (standardRet == 0 && magiclessRet == 0) { + FUZZ_ASSERT(standardOut.pos == magiclessOut.pos); + if (standardOut.pos > 0) { + FUZZ_ASSERT( + memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0 + ); + } + } + } + +cleanup_and_return: +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + free(standardSrc); + free(standardDst); + free(magiclessDst); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index c489b8fa646..f321002a7a0 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -65,6 +65,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD): 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 'huf_round_trip': TargetInfo(InputType.RAW_DATA), 'huf_decompress': TargetInfo(InputType.RAW_DATA), + 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all']