Skip to content

Commit

Permalink
Merge pull request #2518 from facebook/seekTable
Browse files Browse the repository at this point in the history
New direct seekTable access methods
  • Loading branch information
Cyan4973 authored Mar 4, 2021
2 parents 0388054 + 2fa4c8c commit c4d54ab
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 121 deletions.
10 changes: 5 additions & 5 deletions contrib/seekable_format/tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
CPPFLAGS += -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common

CFLAGS ?= -O3
CFLAGS += -g
CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wconversion \
-Wformat=2 -Wstrict-aliasing=1

SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)

.PHONY: default clean test

default: seekable_tests
default: test

test: seekable_tests
./seekable_tests

$(ZSTDLIB):
$(MAKE) -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)

seekable_tests : seekable_tests.c $(SEEKABLE_OBJS)
seekable_tests : $(SEEKABLE_OBJS)

clean:
@rm -f core *.o tmp* result* *.zst \
@$(RM) core *.o tmp* result* *.zst \
seekable_tests
@echo Cleaning completed
159 changes: 119 additions & 40 deletions contrib/seekable_format/tests/seekable_tests.c
Original file line number Diff line number Diff line change
@@ -1,14 +1,92 @@
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h> // malloc
#include <stdio.h>
#include <assert.h>

#include "zstd_seekable.h"

/* Basic unit tests for zstd seekable format */
int main(int argc, const char** argv)
{
unsigned testNb = 1;
(void)argc; (void)argv;
printf("Beginning zstd seekable format tests...\n");

printf("Test %u - simple round trip: ", testNb++);
{ size_t const inSize = 4000;
void* const inBuffer = malloc(inSize);
assert(inBuffer != NULL);

size_t const seekCapacity = 5000;
void* const seekBuffer = malloc(seekCapacity);
assert(seekBuffer != NULL);
size_t seekSize;

size_t const outCapacity = inSize;
void* const outBuffer = malloc(outCapacity);
assert(outBuffer != NULL);

ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream();
assert(zscs != NULL);

{ size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, (unsigned)inSize /* maxFrameSize */);
assert(!ZSTD_isError(initStatus));
}

{ ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity };
ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize };

size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb);
assert(!ZSTD_isError(cStatus));
assert(inb.pos == inb.size);

size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb);
assert(!ZSTD_isError(endStatus));
seekSize = outb.pos;
}

ZSTD_seekable* const stream = ZSTD_seekable_create();
assert(stream != NULL);
{ size_t const initStatus = ZSTD_seekable_initBuff(stream, seekBuffer, seekSize);
assert(!ZSTD_isError(initStatus)); }

{ size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, outCapacity, 0);
assert(decStatus == inSize); }

/* unit test ZSTD_seekTable functions */
ZSTD_seekTable* const zst = ZSTD_seekTable_create_fromSeekable(stream);
assert(zst != NULL);

unsigned const nbFrames = ZSTD_seekTable_getNumFrames(zst);
assert(nbFrames > 0);

unsigned long long const frame0Offset = ZSTD_seekTable_getFrameCompressedOffset(zst, 0);
assert(frame0Offset == 0);

unsigned long long const content0Offset = ZSTD_seekTable_getFrameDecompressedOffset(zst, 0);
assert(content0Offset == 0);

size_t const cSize = ZSTD_seekTable_getFrameCompressedSize(zst, 0);
assert(!ZSTD_isError(cSize));
assert(cSize <= seekCapacity);

size_t const origSize = ZSTD_seekTable_getFrameDecompressedSize(zst, 0);
assert(origSize == inSize);

unsigned const fo1idx = ZSTD_seekTable_offsetToFrameIndex(zst, 1);
assert(fo1idx == 0);

free(inBuffer);
free(seekBuffer);
free(outBuffer);
ZSTD_seekable_freeCStream(zscs);
ZSTD_seekTable_free(zst);
ZSTD_seekable_free(stream);
}
printf("Success!\n");


printf("Test %u - check that seekable decompress does not hang: ", testNb++);
{ /* Github issue #2335 */
const size_t compressed_size = 17;
Expand All @@ -34,20 +112,21 @@ int main(int argc, const char** argv)
const size_t uncompressed_size = 32;
uint8_t uncompressed_data[32];

ZSTD_seekable* stream = ZSTD_seekable_create();
size_t status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
if (ZSTD_isError(status)) {
ZSTD_seekable_free(stream);
goto _test_error;
}
ZSTD_seekable* const stream = ZSTD_seekable_create();
assert(stream != NULL);
{ size_t const status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
if (ZSTD_isError(status)) {
ZSTD_seekable_free(stream);
goto _test_error;
} }

const size_t offset = 2;
/* Should return an error, but not hang */
status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
if (!ZSTD_isError(status)) {
ZSTD_seekable_free(stream);
goto _test_error;
}
{ const size_t offset = 2;
size_t const status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
if (!ZSTD_isError(status)) {
ZSTD_seekable_free(stream);
goto _test_error;
} }

ZSTD_seekable_free(stream);
}
Expand All @@ -57,34 +136,34 @@ int main(int argc, const char** argv)
{ /* Github issue #FIXME */
const size_t compressed_size = 27;
const uint8_t compressed_data[27] = {
'\x28',
'\xb5',
'\x2f',
'\xfd',
'\x00',
'\x32',
'\x91',
'\x00',
'\x00',
'\x00',
'\x5e',
'\x2a',
'\x4d',
'\x18',
'\x09',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\xb1',
'\xea',
'\x92',
'\x8f',
};
(uint8_t)'\x28',
(uint8_t)'\xb5',
(uint8_t)'\x2f',
(uint8_t)'\xfd',
(uint8_t)'\x00',
(uint8_t)'\x32',
(uint8_t)'\x91',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x5e',
(uint8_t)'\x2a',
(uint8_t)'\x4d',
(uint8_t)'\x18',
(uint8_t)'\x09',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\x00',
(uint8_t)'\xb1',
(uint8_t)'\xea',
(uint8_t)'\x92',
(uint8_t)'\x8f',
};
const size_t uncompressed_size = 400;
uint8_t uncompressed_data[400];

Expand Down
45 changes: 38 additions & 7 deletions contrib/seekable_format/zstd_seekable.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ extern "C" {

typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
typedef struct ZSTD_seekable_s ZSTD_seekable;
typedef struct ZSTD_seekTable_s ZSTD_seekTable;

/*-****************************************************************************
* Seekable compression - HowTo
Expand Down Expand Up @@ -107,6 +108,7 @@ ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);


/*-****************************************************************************
* Seekable decompression - HowTo
* A ZSTD_seekable object is required to tracking the seekTable.
Expand Down Expand Up @@ -161,13 +163,42 @@ ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t
ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);

#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
/*===== Seek Table access functions =====*/
ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset);
/*===== Seekable seek table access functions =====*/
ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);


/*-****************************************************************************
* Direct exploitation of the seekTable
*
* Memory constrained use cases that manage multiple archives
* benefit from retaining multiple archive seek tables
* without retaining a ZSTD_seekable instance for each.
*
* Below API allow the above-mentioned use cases
* to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
* then throw the ZSTD_seekable away to save memory.
*
* Standard ZSTD operations can then be used
* to decompress frames based on seek table offsets.
******************************************************************************/

/*===== Independent seek table management =====*/
ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);

/*===== Direct seek table access functions =====*/
ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);


/*===== Seekable advanced I/O API =====*/
typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
Expand Down
Loading

0 comments on commit c4d54ab

Please sign in to comment.