Skip to content

Commit

Permalink
Update with recommendations from Janelia SciCompSoft code review
Browse files Browse the repository at this point in the history
  • Loading branch information
mkitti committed Aug 14, 2024
1 parent d4da095 commit b4714df
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 41 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ dist/

# ignore codec builds
codecs/**/build/
# pixi environments
.pixi
*.egg-info
42 changes: 22 additions & 20 deletions codecs/zstd/zstd_codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,22 @@ val compress(std::string source, int level)

val decompress(std::string source)
{
// number of bytes to grow the output buffer if more space is needed
const size_t DEST_GROWTH_SIZE = ZSTD_DStreamOutSize();

// setup source buffer
const char *source_ptr = source.c_str();
int source_size = source.size();

ZSTD_DStream * zds = ZSTD_createDStream();
// create and initialize decompression stream / context
// use the streaming API so that we can handle unkown frame content size
ZSTD_DStream *zds = ZSTD_createDStream();

size_t status = ZSTD_initDStream(zds);
if (ZSTD_isError(status)) {
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: " + std::string(ZSTD_getErrorName(status)));
}

ZSTD_inBuffer input = {
.src = (void*) source.c_str(),
Expand All @@ -38,11 +48,6 @@ val decompress(std::string source)
.pos = 0,
};

if (ZSTD_isError(status)) {
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: " + std::string(ZSTD_getErrorName(status)));
}

// setup destination buffer
unsigned long long dest_size = ZSTD_getFrameContentSize(source_ptr, source_size);

Expand All @@ -53,8 +58,9 @@ val decompress(std::string source)
dest_size = source_size*2;

// Initialize the destination size to 1 MiB at minimum
if (dest_size < 1024*1024)
dest_size = 1024*1024;
if (dest_size < DEST_GROWTH_SIZE)
dest_size = DEST_GROWTH_SIZE;

} else if (dest_size == ZSTD_CONTENTSIZE_ERROR) {
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: content size error");
Expand All @@ -64,6 +70,7 @@ val decompress(std::string source)
throw std::runtime_error("zstd codec error: unknown ZSTD_getFrameContentSize error");
}

// the output buffer will either be assigned to dest_ptr to be freed by free_result, or freed on error
output.dst = malloc((size_t) dest_size);

if (output.dst == NULL) {
Expand All @@ -87,13 +94,10 @@ val decompress(std::string source)
}

if (status > 0 && output.pos == output.size ) {
// attempt to expand output buffer in 1 MiB (or more) increments
if (status < 1024*1024) {
status = (size_t) 1024*1024;
}
size_t new_size = output.size + status;
// attempt to expand output buffer in DEST_GROWTH_SIZE increments
size_t new_size = output.size + DEST_GROWTH_SIZE;

if (new_size < output.size || new_size < status) {
if (new_size < output.size || new_size < DEST_GROWTH_SIZE) {
// overflow error
ZSTD_freeDStream(zds);
free(output.dst);
Expand All @@ -114,15 +118,13 @@ val decompress(std::string source)

output.size = new_size;
}
} while (status > 0);

// status > 0 indicates there are additional bytes to process in this frame
// status == 0 and input.pos < input.size suggests there may be an additional frame
} while (status > 0 || input.pos < input.size);

ZSTD_freeDStream(zds);

if (input.pos < input.size) {
free(output.dst);
throw std::runtime_error("zstd codec error: unprocessed input suggests more than one input frame");
}

dest_ptr = (char *) output.dst;

return val(typed_memory_view(output.pos, (uint8_t *)dest_ptr));
Expand Down
1 change: 1 addition & 0 deletions codecs/zstd/zstd_codec.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export interface ZstdModule extends EmscriptenModule {
compress(data: BufferSource, level: number): Uint8Array;
decompress(data: BufferSource): Uint8Array;
free_result(): void;
getExceptionMessage(err: WebAssembly.Exception): [string, string];
}

declare const moduleFactory: EmscriptenModuleFactory<ZstdModule>;
Expand Down
Binary file modified codecs/zstd/zstd_codec.wasm
Binary file not shown.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 11 additions & 7 deletions src/zstd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,18 @@ const Zstd: CodecConstructor<ZstdConfig> = class Zstd implements Codec {
emscriptenModule = init();
}
const module = await emscriptenModule;
const view = module.decompress(data);
const result = new Uint8Array(view); // Copy view and free wasm memory
module.free_result();
if (out !== undefined) {
out.set(result);
return out;
try {
const view = module.decompress(data);
const result = new Uint8Array(view); // Copy view and free wasm memory
module.free_result();
if (out !== undefined) {
out.set(result);
return out;
}
return result;
} catch (err) {
throw new Error(module.getExceptionMessage(err).toString());
}
return result;
}
};

Expand Down
31 changes: 19 additions & 12 deletions test/zstd.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,28 @@ test('Static constructor', async t => {
});

test('Streaming decompression', async t => {
// Test input frames with unknown frame content size
const config = { id: 'zstd' };
const codec = Zstd.fromConfig(config);

// We encode bytes directly that were the result of streaming compression
// Encode bytes directly that were the result of streaming compression
const bytes = new Uint8Array([
40, 181, 47, 253, 0, 88, 97, 0, 0, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33,
]);
const dec = await codec.decode(bytes);
const str = Buffer.from(dec).toString();
t.equal(str, 'Hello World!');

const bytes2 = new Uint8Array([
// Two consecutive frames given as input
const bytes2 = new Uint8Array(bytes.length * 2);
bytes2.set(bytes, 0);
bytes2.set(bytes, bytes.length);
const dec2 = await codec.decode(bytes2);
const str2 = Buffer.from(dec2).toString();
t.equal(str2, 'Hello World!Hello World!');

// Single long frame that decompresses to a large output
const bytes3 = new Uint8Array([
40, 181, 47, 253, 0, 88, 36, 2, 0, 164, 3, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 1, 0, 58, 252, 223, 115, 5, 5, 76, 0, 0, 8,
Expand All @@ -63,21 +73,18 @@ test('Streaming decompression', async t => {
255, 57, 16, 2, 76, 0, 0, 8, 85, 1, 0, 252, 255, 57, 16, 2, 76, 0, 0, 8, 77, 1, 0, 252, 255, 57, 16, 2, 77, 0, 0, 8,
69, 1, 0, 252, 127, 29, 8, 1,
]);
const dec2 = await codec.decode(bytes2);
const str2 = Buffer.from(dec2).toString();
t.equal(str2, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz'.repeat(1024 * 32));
const dec3 = await codec.decode(bytes3);
const str3 = Buffer.from(dec3).toString();
t.equal(str3, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz'.repeat(1024 * 32));

//two frames given as input
const bytes3 = new Uint8Array([
40, 181, 47, 253, 0, 88, 97, 0, 0, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 40, 181, 47, 253, 0, 88,
97, 0, 0, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33,
]);
codec.decode(bytes3).then(
// Garbage input results in an error
const bytes4 = new Uint8Array([0, 0, 0, 0, 0, 0, 0, 0]);
codec.decode(bytes4).then(
result => {
t.fail();
},
error => {
t.ok(true);
t.ok(error.message.match('zstd codec error: content size error'));
}
);
});

0 comments on commit b4714df

Please sign in to comment.