From 9b8f337357aaf1cb3dde8ed608bac20cc23d2fc1 Mon Sep 17 00:00:00 2001 From: Mark Dittmer Date: Thu, 7 May 2020 09:31:43 -0400 Subject: [PATCH 01/10] [contrib] Support seek table-only API Memory constrained use cases that manage multiple archives benefit from retaining multiple archive seek tables without retaining a ZSTD_seekable instance for each. * New opaque type for seek table: ZSTD_seekTable. * ZSTD_seekable_copySeekTable() supports copying seek table out of a ZSTD_seekable. * ZSTD_seekTable_[eachSeekTableOp]() defines seek table API that mirrors existing seek table operations. * Existing ZSTD_seekable_[eachSeekTableOp]() retained; they delegate to ZSTD_seekTable the variant. These changes allow the above-mentioned use cases to initialize a ZSTD_seekable, extract its ZSTD_seekTable, then throw the ZSTD_seekable away to save memory. Standard ZSTD operations can then be used to decompress frames based on seek table offsets. The copy and delegate patterns are intended to minimize impact on existing code and clients. Using copy instead of move for the infrequent operation extracting a seek table ensures that the extraction does not render the ZSTD_seekable useless. Delegating to *new* seek table-oriented APIs ensures that this is not a breaking change for existing clients while supporting all meaningful operations that depend only on seek table data. --- contrib/seekable_format/zstd_seekable.h | 15 ++- contrib/seekable_format/zstdseek_decompress.c | 105 ++++++++++++++---- 2 files changed, 99 insertions(+), 21 deletions(-) diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index 7ffd1ba0a72..c02f097c497 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -29,6 +29,7 @@ extern "C" { typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; typedef struct ZSTD_seekable_s ZSTD_seekable; +typedef struct ZSTD_seekTable_s ZSTD_seekTable; /*-**************************************************************************** * Seekable compression - HowTo @@ -154,6 +155,10 @@ ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffe ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); +/*===== Independent seek table management =====*/ +ZSTDLIB_API size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out); +ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); + /*===== Seekable decompression functions =====*/ ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); @@ -161,7 +166,7 @@ ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) -/*===== Seek Table access functions =====*/ +/*===== Seekable seek table access functions =====*/ ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs); ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); @@ -169,6 +174,14 @@ ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset); +/*===== Direct seek table access functions =====*/ +ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long offset); + /*===== Seekable advanced I/O API =====*/ typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index abfd1e90271..20cde287950 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -142,18 +142,18 @@ typedef struct { U32 checksum; } seekEntry_t; -typedef struct { +struct ZSTD_seekTable_s { seekEntry_t* entries; size_t tableLen; int checksumFlag; -} seekTable_t; +}; #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX struct ZSTD_seekable_s { ZSTD_DStream* dstream; - seekTable_t seekTable; + ZSTD_seekTable seekTable; ZSTD_seekable_customFile src; U64 decompressedOffset; @@ -197,23 +197,63 @@ size_t ZSTD_seekable_free(ZSTD_seekable* zs) return 0; } +size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out) +{ + ZSTD_seekTable* st = malloc(sizeof(ZSTD_seekTable)); + if (!st) { + free(st); + return ERROR(memory_allocation); + } + + st->checksumFlag = zs->seekTable.checksumFlag; + st->tableLen = zs->seekTable.tableLen; + + /* Allocate an extra entry at the end to match logic of initial allocation */ + size_t entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); + seekEntry_t* entries = (seekEntry_t*)malloc(entriesSize); + if (!entries) { + free(entries); + return ERROR(memory_allocation); + } + + memcpy(entries, zs->seekTable.entries, entriesSize); + st->entries = entries; + + *out = st; + return 0; +} + +size_t ZSTD_seekTable_free(ZSTD_seekTable* st) +{ + if (st == NULL) return 0; /* support free on null */ + free(st->entries); + free(st); + + return 0; +} + /** ZSTD_seekable_offsetToFrameIndex() : * Performs a binary search to find the last frame with a decompressed offset * <= pos * @return : the frame's index */ unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos) +{ + return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos); +} + +unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long pos) { U32 lo = 0; - U32 hi = (U32)zs->seekTable.tableLen; - assert(zs->seekTable.tableLen <= UINT_MAX); + U32 hi = (U32)st->tableLen; + assert(st->tableLen <= UINT_MAX); - if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) { - return (U32)zs->seekTable.tableLen; + if (pos >= st->entries[st->tableLen].dOffset) { + return (U32)st->tableLen; } while (lo + 1 < hi) { U32 const mid = lo + ((hi - lo) >> 1); - if (zs->seekTable.entries[mid].dOffset <= pos) { + if (st->entries[mid].dOffset <= pos) { lo = mid; } else { hi = mid; @@ -224,34 +264,59 @@ unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs) { - assert(zs->seekTable.tableLen <= UINT_MAX); - return (unsigned)zs->seekTable.tableLen; + return ZSTD_seekTable_getNumFrames(&zs->seekTable); +} + +unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st) +{ + assert(st->tableLen <= UINT_MAX); + return (unsigned)st->tableLen; } unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].cOffset; + return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex); +} + +unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return st->entries[frameIndex].cOffset; } unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].dOffset; + return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex); +} + +unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return st->entries[frameIndex].dOffset; } size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].cOffset - - zs->seekTable.entries[frameIndex].cOffset; + return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex); +} + +size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge); + return st->entries[frameIndex + 1].cOffset - + st->entries[frameIndex].cOffset; } size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].dOffset - - zs->seekTable.entries[frameIndex].dOffset; + return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex); +} + +size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge); + return st->entries[frameIndex + 1].dOffset - + st->entries[frameIndex].dOffset; } static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) From a80b10f5e6de80c2f0ae520c08eeb27c217716ad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 2 Mar 2021 15:03:37 -0800 Subject: [PATCH 02/10] fix potential leak on exit --- contrib/seekable_format/zstdseek_decompress.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 9bc7dd21de0..749d6cd9fa2 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -195,26 +195,22 @@ size_t ZSTD_seekable_free(ZSTD_seekable* zs) ZSTD_freeDStream(zs->dstream); free(zs->seekTable.entries); free(zs); - return 0; } size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out) { - ZSTD_seekTable* st = malloc(sizeof(ZSTD_seekTable)); - if (!st) { - free(st); - return ERROR(memory_allocation); - } + ZSTD_seekTable* const st = malloc(sizeof(ZSTD_seekTable)); + if (st==NULL) return ERROR(memory_allocation); st->checksumFlag = zs->seekTable.checksumFlag; st->tableLen = zs->seekTable.tableLen; /* Allocate an extra entry at the end to match logic of initial allocation */ size_t entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); - seekEntry_t* entries = (seekEntry_t*)malloc(entriesSize); - if (!entries) { - free(entries); + seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize); + if (entries==NULL) { + free(st); return ERROR(memory_allocation); } @@ -230,7 +226,6 @@ size_t ZSTD_seekTable_free(ZSTD_seekTable* st) if (st == NULL) return 0; /* support free on null */ free(st->entries); free(st); - return 0; } From c7e42e147b422366f266ec32a9b9e325293b02ce Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 2 Mar 2021 15:24:30 -0800 Subject: [PATCH 03/10] fixed const guarantees read-only objects are properly const-ified in parameters --- contrib/seekable_format/zstd_seekable.h | 35 ++++++++++--------- contrib/seekable_format/zstdseek_decompress.c | 29 ++++++++------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index c02f097c497..bf0118a3462 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -108,6 +108,7 @@ ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl); ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum); ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output); + /*-**************************************************************************** * Seekable decompression - HowTo * A ZSTD_seekable object is required to tracking the seekTable. @@ -155,10 +156,6 @@ ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffe ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); -/*===== Independent seek table management =====*/ -ZSTDLIB_API size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out); -ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); - /*===== Seekable decompression functions =====*/ ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); @@ -167,20 +164,26 @@ ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, s #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) /*===== Seekable seek table access functions =====*/ -ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs); -ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset); +ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs); +ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); +ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex); +ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset); + + +/*===== Independent seek table management =====*/ +ZSTDLIB_API size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out); +ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); /*===== Direct seek table access functions =====*/ -ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st); -ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); -ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); -ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long offset); +ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex); +ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset); + /*===== Seekable advanced I/O API =====*/ typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 749d6cd9fa2..5315e6c3614 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -233,12 +233,12 @@ size_t ZSTD_seekTable_free(ZSTD_seekTable* st) * Performs a binary search to find the last frame with a decompressed offset * <= pos * @return : the frame's index */ -unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos) +unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos) { return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos); } -unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long pos) +unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos) { U32 lo = 0; U32 hi = (U32)st->tableLen; @@ -259,57 +259,57 @@ unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned lo return lo; } -unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs) +unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs) { return ZSTD_seekTable_getNumFrames(&zs->seekTable); } -unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st) +unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st) { assert(st->tableLen <= UINT_MAX); return (unsigned)st->tableLen; } -unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) +unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) { return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex); } -unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) { if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; return st->entries[frameIndex].cOffset; } -unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) +unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) { return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex); } -unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) { if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; return st->entries[frameIndex].dOffset; } -size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) +size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) { return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex); } -size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) { if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge); return st->entries[frameIndex + 1].cOffset - st->entries[frameIndex].cOffset; } -size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) +size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) { return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex); } -size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) { if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge); return st->entries[frameIndex + 1].dOffset - @@ -513,7 +513,7 @@ size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsign zs->in.size = toRead; zs->in.pos = 0; } - } + } /* while (zs->decompressedOffset < offset + len) */ } while (zs->decompressedOffset != offset + len); return len; @@ -525,8 +525,7 @@ size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSiz return ERROR(frameIndex_tooLarge); } - { - size_t const decompressedSize = + { size_t const decompressedSize = zs->seekTable.entries[frameIndex + 1].dOffset - zs->seekTable.entries[frameIndex].dOffset; if (dstSize < decompressedSize) { From 029f974ddc6f3fcfaaa51173b09fdc7bd9dffcc1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 2 Mar 2021 15:43:52 -0800 Subject: [PATCH 04/10] strengthen compilation flags --- contrib/seekable_format/tests/Makefile | 5 +++-- .../seekable_format/tests/seekable_tests.c | 1 + contrib/seekable_format/zstd_seekable.h | 2 +- contrib/seekable_format/zstdseek_decompress.c | 20 +++++++++---------- lib/common/mem.h | 2 +- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/contrib/seekable_format/tests/Makefile b/contrib/seekable_format/tests/Makefile index b00657f80ad..707d04d551c 100644 --- a/contrib/seekable_format/tests/Makefile +++ b/contrib/seekable_format/tests/Makefile @@ -16,7 +16,8 @@ ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME) CPPFLAGS += -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common CFLAGS ?= -O3 -CFLAGS += -g +CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wconversion \ + -Wformat=2 -Wstrict-aliasing=1 SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB) @@ -33,6 +34,6 @@ $(ZSTDLIB): seekable_tests : seekable_tests.c $(SEEKABLE_OBJS) clean: - @rm -f core *.o tmp* result* *.zst \ + @$(RM) core *.o tmp* result* *.zst \ seekable_tests @echo Cleaning completed diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index f2556b5180b..e9911c08465 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -8,6 +8,7 @@ int main(int argc, const char** argv) { unsigned testNb = 1; + (void)argc; (void)argv; printf("Beginning zstd seekable format tests...\n"); printf("Test %u - check that seekable decompress does not hang: ", testNb++); { /* Github issue #2335 */ diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index bf0118a3462..b7e2d82da4a 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -173,7 +173,7 @@ ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, u /*===== Independent seek table management =====*/ -ZSTDLIB_API size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out); +ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs); ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); /*===== Direct seek table access functions =====*/ diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 5315e6c3614..cf9e3dad5e0 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -118,15 +118,16 @@ static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) { buffWrapper_t* const buff = (buffWrapper_t*) opaque; unsigned long long newOffset; + assert(offset >= 0); switch (origin) { case SEEK_SET: - newOffset = offset; + newOffset = (unsigned long long)offset; break; case SEEK_CUR: - newOffset = (unsigned long long)buff->pos + offset; + newOffset = (unsigned long long)buff->pos + (unsigned long long)offset; break; case SEEK_END: - newOffset = (unsigned long long)buff->size + offset; + newOffset = (unsigned long long)buff->size + (unsigned long long)offset; break; default: assert(0); /* not possible */ @@ -198,10 +199,10 @@ size_t ZSTD_seekable_free(ZSTD_seekable* zs) return 0; } -size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out) +ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs) { ZSTD_seekTable* const st = malloc(sizeof(ZSTD_seekTable)); - if (st==NULL) return ERROR(memory_allocation); + if (st==NULL) return NULL; st->checksumFlag = zs->seekTable.checksumFlag; st->tableLen = zs->seekTable.tableLen; @@ -211,14 +212,12 @@ size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out) seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize); if (entries==NULL) { free(st); - return ERROR(memory_allocation); + return NULL; } memcpy(entries, zs->seekTable.entries, entriesSize); st->entries = entries; - - *out = st; - return 0; + return st; } size_t ZSTD_seekTable_free(ZSTD_seekTable* st) @@ -449,8 +448,9 @@ size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsign zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset; zs->curFrame = targetFrame; + assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX); CHECK_IO(zs->src.seek(zs->src.opaque, - zs->seekTable.entries[targetFrame].cOffset, + (long long)zs->seekTable.entries[targetFrame].cOffset, SEEK_SET)); zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0}; XXH64_reset(&zs->xxhState, 0); diff --git a/lib/common/mem.h b/lib/common/mem.h index 9813bfc4235..961f89849dc 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -308,7 +308,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) MEM_STATIC U32 MEM_readLE24(const void* memPtr) { - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); + return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); } MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) From ac95a304556aa6461aab92968994afa8b8883f40 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 2 Mar 2021 16:03:18 -0800 Subject: [PATCH 05/10] various minor style fixes --- .../seekable_format/tests/seekable_tests.c | 26 +++++++++--------- contrib/seekable_format/zstdseek_compress.c | 19 ++++++------- contrib/seekable_format/zstdseek_decompress.c | 27 +++++++------------ 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index e9911c08465..658b356dfac 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "zstd_seekable.h" @@ -35,20 +36,21 @@ int main(int argc, const char** argv) const size_t uncompressed_size = 32; uint8_t uncompressed_data[32]; - ZSTD_seekable* stream = ZSTD_seekable_create(); - size_t status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size); - if (ZSTD_isError(status)) { - ZSTD_seekable_free(stream); - goto _test_error; - } + ZSTD_seekable* const stream = ZSTD_seekable_create(); + assert(stream != NULL); + { size_t const status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size); + if (ZSTD_isError(status)) { + ZSTD_seekable_free(stream); + goto _test_error; + } } - const size_t offset = 2; /* Should return an error, but not hang */ - status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset); - if (!ZSTD_isError(status)) { - ZSTD_seekable_free(stream); - goto _test_error; - } + { const size_t offset = 2; + size_t const status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset); + if (!ZSTD_isError(status)) { + ZSTD_seekable_free(stream); + goto _test_error; + } } ZSTD_seekable_free(stream); } diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 3db29188757..f153aee73e0 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -19,6 +19,7 @@ #include "zstd.h" #include "zstd_errors.h" #include "mem.h" + #include "zstd_seekable.h" #define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; } @@ -75,7 +76,7 @@ size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) return 0; } -size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl) +static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl) { if (fl != NULL) free(fl->entries); return 0; @@ -83,7 +84,7 @@ size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl) ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag) { - ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog)); + ZSTD_frameLog* const fl = malloc(sizeof(ZSTD_frameLog)); if (fl == NULL) return NULL; if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) { @@ -106,10 +107,9 @@ size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl) return 0; } -ZSTD_seekable_CStream* ZSTD_seekable_createCStream() +ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void) { - ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream)); - + ZSTD_seekable_CStream* const zcs = malloc(sizeof(ZSTD_seekable_CStream)); if (zcs == NULL) return NULL; memset(zcs, 0, sizeof(*zcs)); @@ -134,7 +134,6 @@ size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs) ZSTD_freeCStream(zcs->cstream); ZSTD_seekable_frameLog_freeVec(&zcs->framelog); free(zcs); - return 0; } @@ -152,9 +151,8 @@ size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, return ERROR(frameParameter_unsupported); } - zcs->maxFrameSize = maxFrameSize - ? maxFrameSize - : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE; + zcs->maxFrameSize = maxFrameSize ? + maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE; zcs->framelog.checksumFlag = checksumFlag; if (zcs->framelog.checksumFlag) { @@ -224,8 +222,7 @@ size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output zcs->frameDSize = 0; ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only); - if (zcs->framelog.checksumFlag) - XXH64_reset(&zcs->xxhState, 0); + if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0); return 0; } diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index cf9e3dad5e0..6cb9e03fc8b 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -107,7 +107,8 @@ typedef struct { static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) { - buffWrapper_t* buff = (buffWrapper_t*) opaque; + buffWrapper_t* const buff = (buffWrapper_t*)opaque; + assert(buff != NULL); if (buff->pos + n > buff->size) return -1; memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n); buff->pos += n; @@ -118,6 +119,7 @@ static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) { buffWrapper_t* const buff = (buffWrapper_t*) opaque; unsigned long long newOffset; + assert(buff != NULL); assert(offset >= 0); switch (origin) { case SEEK_SET: @@ -174,8 +176,7 @@ struct ZSTD_seekable_s { ZSTD_seekable* ZSTD_seekable_create(void) { - ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable)); - + ZSTD_seekable* const zs = malloc(sizeof(ZSTD_seekable)); if (zs == NULL) return NULL; /* also initializes stage to zsds_init */ @@ -208,7 +209,7 @@ ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs) st->tableLen = zs->seekTable.tableLen; /* Allocate an extra entry at the end to match logic of initial allocation */ - size_t entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); + size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize); if (entries==NULL) { free(st); @@ -244,7 +245,7 @@ unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned lo assert(st->tableLen <= UINT_MAX); if (pos >= st->entries[st->tableLen].dOffset) { - return (U32)st->tableLen; + return (unsigned)st->tableLen; } while (lo + 1 < hi) { @@ -333,8 +334,7 @@ static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) /* check reserved bits */ if ((checksumFlag >> 2) & 0x1f) { return ERROR(corruption_detected); - } - } + } } { U32 const numFrames = MEM_readLE32(zs->inBuff); U32 const sizePerEntry = 8 + (checksumFlag?4:0); @@ -342,12 +342,9 @@ static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE; U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */ - { - U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); - + { U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END)); CHECK_IO(src.read(src.opaque, zs->inBuff, toRead)); - remaining -= toRead; } @@ -360,19 +357,15 @@ static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) { /* Allocate an extra entry at the end so that we can do size * computations on the last element without special case */ - seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); + seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); U32 idx = 0; U32 pos = 8; - U64 cOffset = 0; U64 dOffset = 0; - if (!entries) { - free(entries); - return ERROR(memory_allocation); - } + if (entries == NULL) return ERROR(memory_allocation); /* compute cumulative positions */ for (; idx < numFrames; idx++) { From a1d7b9d654279cbea2a8a27a88b5e43894cf47a1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 3 Mar 2021 15:17:12 -0800 Subject: [PATCH 06/10] fixed gcc conversion warnings --- .../seekable_format/tests/seekable_tests.c | 56 +++++++++---------- contrib/seekable_format/zstdseek_compress.c | 11 ++-- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index af851dd6361..7a276bc4cdc 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -60,34 +60,34 @@ int main(int argc, const char** argv) { /* Github issue #FIXME */ const size_t compressed_size = 27; const uint8_t compressed_data[27] = { - '\x28', - '\xb5', - '\x2f', - '\xfd', - '\x00', - '\x32', - '\x91', - '\x00', - '\x00', - '\x00', - '\x5e', - '\x2a', - '\x4d', - '\x18', - '\x09', - '\x00', - '\x00', - '\x00', - '\x00', - '\x00', - '\x00', - '\x00', - '\x00', - '\xb1', - '\xea', - '\x92', - '\x8f', - }; + (uint8_t)'\x28', + (uint8_t)'\xb5', + (uint8_t)'\x2f', + (uint8_t)'\xfd', + (uint8_t)'\x00', + (uint8_t)'\x32', + (uint8_t)'\x91', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x5e', + (uint8_t)'\x2a', + (uint8_t)'\x4d', + (uint8_t)'\x18', + (uint8_t)'\x09', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\x00', + (uint8_t)'\xb1', + (uint8_t)'\xea', + (uint8_t)'\x92', + (uint8_t)'\x8f', + }; const size_t uncompressed_size = 400; uint8_t uncompressed_data[400]; diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index f153aee73e0..442f9bee9ad 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -202,7 +202,7 @@ size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output /* end the frame */ size_t ret = ZSTD_endStream(zcs->cstream, output); - zcs->frameCSize += output->pos - prevOutPos; + zcs->frameCSize += (U32)(output->pos - prevOutPos); /* need to flush before doing the rest */ if (ret) return ret; @@ -245,8 +245,8 @@ size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* XXH64_update(&zcs->xxhState, inBase, inTmp.pos); } - zcs->frameCSize += output->pos - prevOutPos; - zcs->frameDSize += inTmp.pos; + zcs->frameCSize += (U32)(output->pos - prevOutPos); + zcs->frameDSize += (U32)inTmp.pos; input->pos += inTmp.pos; @@ -287,7 +287,7 @@ static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl, memcpy((BYTE*)output->dst + output->pos, tmp + (fl->seekTablePos - offset), lenWrite); output->pos += lenWrite; - fl->seekTablePos += lenWrite; + fl->seekTablePos += (U32)lenWrite; if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos; } @@ -336,8 +336,7 @@ size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output) if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos; if (fl->seekTablePos < seekTableLen - 4) { - BYTE sfd = 0; - sfd |= (fl->checksumFlag) << 7; + BYTE const sfd = (BYTE)((fl->checksumFlag) << 7); ((BYTE*)output->dst)[output->pos] = sfd; output->pos++; From 6c0bfc468c2eb27bb9f5ecc62e418f59d22980a7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 3 Mar 2021 15:30:55 -0800 Subject: [PATCH 07/10] fixed wrong assert condition --- contrib/seekable_format/zstdseek_decompress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 71d70c024d6..748e76a3f6a 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -120,16 +120,16 @@ static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) buffWrapper_t* const buff = (buffWrapper_t*) opaque; unsigned long long newOffset; assert(buff != NULL); - assert(offset >= 0); switch (origin) { case SEEK_SET: + assert(offset >= 0); newOffset = (unsigned long long)offset; break; case SEEK_CUR: - newOffset = (unsigned long long)buff->pos + (unsigned long long)offset; + newOffset = (unsigned long long)((long long)buff->pos + offset); break; case SEEK_END: - newOffset = (unsigned long long)buff->size + (unsigned long long)offset; + newOffset = (unsigned long long)((long long)buff->size + offset); break; default: assert(0); /* not possible */ From 713d4953f712ddc7477877549a0eaebcd0062714 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 3 Mar 2021 18:00:41 -0800 Subject: [PATCH 08/10] fixed gcc-7 conversion warning --- contrib/seekable_format/zstdseek_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 442f9bee9ad..7895e8edc7b 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -71,7 +71,7 @@ size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) fl->entries = (framelogEntry_t*)malloc( sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY); if (fl->entries == NULL) return ERROR(memory_allocation); - fl->capacity = FRAMELOG_STARTING_CAPACITY; + fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY; return 0; } From 16ec1cf3551edabf8e23a156e0fc517a2a8cc28d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 3 Mar 2021 18:55:34 -0800 Subject: [PATCH 09/10] added test case for seekTable API and simple roundtrip test --- contrib/seekable_format/tests/Makefile | 5 +- .../seekable_format/tests/seekable_tests.c | 76 +++++++++++++++++++ contrib/seekable_format/zstdseek_compress.c | 5 +- 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/contrib/seekable_format/tests/Makefile b/contrib/seekable_format/tests/Makefile index 707d04d551c..15eadb40e2d 100644 --- a/contrib/seekable_format/tests/Makefile +++ b/contrib/seekable_format/tests/Makefile @@ -22,8 +22,7 @@ CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wconversion \ SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB) .PHONY: default clean test - -default: seekable_tests +default: test test: seekable_tests ./seekable_tests @@ -31,7 +30,7 @@ test: seekable_tests $(ZSTDLIB): $(MAKE) -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME) -seekable_tests : seekable_tests.c $(SEEKABLE_OBJS) +seekable_tests : $(SEEKABLE_OBJS) clean: @$(RM) core *.o tmp* result* *.zst \ diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index 7a276bc4cdc..7a250f39d39 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -1,5 +1,6 @@ #include #include +#include // malloc #include #include @@ -11,6 +12,81 @@ int main(int argc, const char** argv) unsigned testNb = 1; (void)argc; (void)argv; printf("Beginning zstd seekable format tests...\n"); + + printf("Test %u - simple round trip: ", testNb++); + { size_t const inSize = 4000; + void* const inBuffer = malloc(inSize); + assert(inBuffer != NULL); + + size_t const seekCapacity = 5000; + void* const seekBuffer = malloc(seekCapacity); + assert(seekBuffer != NULL); + size_t seekSize; + + size_t const outCapacity = inSize; + void* const outBuffer = malloc(outCapacity); + assert(outBuffer != NULL); + + ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream(); + assert(zscs != NULL); + + { size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, inSize /* maxFrameSize */); + assert(!ZSTD_isError(initStatus)); + } + + { ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity }; + ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize }; + + size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb); + assert(!ZSTD_isError(cStatus)); + assert(inb.pos == inb.size); + + size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb); + assert(!ZSTD_isError(endStatus)); + seekSize = outb.pos; + } + + ZSTD_seekable* const stream = ZSTD_seekable_create(); + assert(stream != NULL); + { size_t const initStatus = ZSTD_seekable_initBuff(stream, seekBuffer, seekSize); + assert(!ZSTD_isError(initStatus)); } + + { size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, outCapacity, 0); + assert(decStatus == inSize); } + + /* unit test ZSTD_seekTable functions */ + ZSTD_seekTable* const zst = ZSTD_seekTable_create_fromSeekable(stream); + assert(zst != NULL); + + unsigned const nbFrames = ZSTD_seekTable_getNumFrames(zst); + assert(nbFrames > 0); + + unsigned long long const frame0Offset = ZSTD_seekTable_getFrameCompressedOffset(zst, 0); + assert(frame0Offset == 0); + + unsigned long long const content0Offset = ZSTD_seekTable_getFrameDecompressedOffset(zst, 0); + assert(content0Offset == 0); + + size_t const cSize = ZSTD_seekTable_getFrameCompressedSize(zst, 0); + assert(!ZSTD_isError(cSize)); + assert(cSize <= seekCapacity); + + size_t const origSize = ZSTD_seekTable_getFrameDecompressedSize(zst, 0); + assert(origSize == inSize); + + unsigned const fo1idx = ZSTD_seekTable_offsetToFrameIndex(zst, 1); + assert(fo1idx == 0); + + free(inBuffer); + free(seekBuffer); + free(outBuffer); + ZSTD_seekable_freeCStream(zscs); + ZSTD_seekTable_free(zst); + ZSTD_seekable_free(stream); + } + printf("Success!\n"); + + printf("Test %u - check that seekable decompress does not hang: ", testNb++); { /* Github issue #2335 */ const size_t compressed_size = 17; diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 442f9bee9ad..d92917a6250 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -64,15 +64,14 @@ struct ZSTD_seekable_CStream_s { int writingSeekTable; }; -size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) +static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) { /* allocate some initial space */ size_t const FRAMELOG_STARTING_CAPACITY = 16; fl->entries = (framelogEntry_t*)malloc( sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY); if (fl->entries == NULL) return ERROR(memory_allocation); - fl->capacity = FRAMELOG_STARTING_CAPACITY; - + fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY; return 0; } From 2fa4c8c4054b1d38c67c22124f1f09a0dce04f6c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 3 Mar 2021 22:54:04 -0800 Subject: [PATCH 10/10] added code comments for new API ZSTD_seekTable --- contrib/seekable_format/zstd_seekable.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index b7e2d82da4a..d2807cfbd6f 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -172,6 +172,21 @@ ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* z ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset); +/*-**************************************************************************** +* Direct exploitation of the seekTable +* +* Memory constrained use cases that manage multiple archives +* benefit from retaining multiple archive seek tables +* without retaining a ZSTD_seekable instance for each. +* +* Below API allow the above-mentioned use cases +* to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable, +* then throw the ZSTD_seekable away to save memory. +* +* Standard ZSTD operations can then be used +* to decompress frames based on seek table offsets. +******************************************************************************/ + /*===== Independent seek table management =====*/ ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs); ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);