Skip to content

Commit

Permalink
fix(gzip): Fix decoding of zlib-format data
Browse files Browse the repository at this point in the history
This was accidentally broken by the change to use `CompressionStream`.

Fixes #679.
  • Loading branch information
jbms committed Dec 10, 2024
1 parent c581f20 commit e9c1ff7
Show file tree
Hide file tree
Showing 13 changed files with 75 additions and 45 deletions.
7 changes: 5 additions & 2 deletions python/tests/n5_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
[
{"driver": "n5", "metadata": {"compression": {"type": "raw"}}},
{"driver": "n5", "metadata": {"compression": {"type": "gzip"}}},
{
"driver": "n5",
"metadata": {"compression": {"type": "gzip", "useZlib": True}},
},
{
"driver": "n5",
"metadata": {
Expand All @@ -38,8 +42,7 @@
}
},
},
# TODO(jbms): Add once tensorstore supports zstd
# {"driver": "n5", "metadata": {"compression": {"type": "zstd"}}},
{"driver": "n5", "metadata": {"compression": {"type": "zstd"}}},
],
ids=str,
)
Expand Down
1 change: 1 addition & 0 deletions python/tests/zarr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"spec",
[
{"driver": "zarr"},
{"driver": "zarr", "metadata": {"compressor": {"id": "zlib"}}},
{"driver": "zarr", "schema": {"chunk_layout": {"inner_order": [2, 1, 0]}}},
{"driver": "zarr3"},
{"driver": "zarr3", "schema": {"chunk_layout": {"inner_order": [2, 1, 0]}}},
Expand Down
5 changes: 4 additions & 1 deletion src/datasource/n5/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ async function decodeChunk(
chunk.chunkDataSize = shape;
let buffer = new Uint8Array(response, offset);
switch (encoding) {
case VolumeChunkEncoding.ZLIB:
buffer = new Uint8Array(await decodeGzip(buffer, "deflate"));
break;
case VolumeChunkEncoding.GZIP:
buffer = new Uint8Array(await decodeGzip(buffer));
buffer = new Uint8Array(await decodeGzip(buffer, "gzip"));
break;
case VolumeChunkEncoding.BLOSC:
buffer = await requestAsyncComputation(
Expand Down
9 changes: 5 additions & 4 deletions src/datasource/n5/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
*/

export enum VolumeChunkEncoding {
RAW = 0,
GZIP = 1,
BLOSC = 2,
ZSTD = 3,
RAW,
ZLIB,
GZIP,
BLOSC,
ZSTD,
}

export class VolumeChunkSourceParameters {
Expand Down
12 changes: 12 additions & 0 deletions src/datasource/n5/frontend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import {
expectArray,
parseArray,
parseFixedLengthArray,
verifyBoolean,
verifyEnumString,
verifyFinitePositiveFloat,
verifyObject,
Expand Down Expand Up @@ -235,6 +236,17 @@ class ScaleMetadata {
encoding = verifyObjectProperty(compression, "type", (x) =>
verifyEnumString(x, VolumeChunkEncoding),
);
if (
encoding === VolumeChunkEncoding.GZIP &&
verifyOptionalObjectProperty(
compression,
"useZlib",
verifyBoolean,
false,
) === true
) {
encoding = VolumeChunkEncoding.ZLIB;
}
});
if (encoding === undefined) {
encoding = verifyObjectProperty(obj, "compressionType", (x) =>
Expand Down
2 changes: 1 addition & 1 deletion src/datasource/nifti/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ async function decodeNiftiFile(
_cancellationToken: CancellationToken,
) {
if (isCompressed(buffer)) {
buffer = await decodeGzip(buffer);
buffer = await decodeGzip(buffer, "gzip");
}
const data = new NiftiFileData();
data.uncompressedData = buffer;
Expand Down
7 changes: 5 additions & 2 deletions src/datasource/precomputed/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,10 @@ function getMinishardIndexDataSource(
cancellationToken,
);
if (sharding.minishardIndexEncoding === DataEncoding.GZIP) {
minishardIndexResponse = await decodeGzip(minishardIndexResponse);
minishardIndexResponse = await decodeGzip(
minishardIndexResponse,
"gzip",
);
}
if (minishardIndexResponse.byteLength % 24 !== 0) {
throw new Error(
Expand Down Expand Up @@ -344,7 +347,7 @@ async function getShardedData(
cancellationToken,
);
if (minishardIndexSource.sharding.dataEncoding === DataEncoding.GZIP) {
data = await decodeGzip(data);
data = await decodeGzip(data, "gzip");
}
return {
data,
Expand Down
31 changes: 18 additions & 13 deletions src/datasource/zarr/codec/gzip/decode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,21 @@ import { CodecKind } from "#src/datasource/zarr/codec/index.js";
import type { CancellationToken } from "#src/util/cancellation.js";
import { decodeGzip } from "#src/util/gzip.js";

registerCodec({
name: "gzip",
kind: CodecKind.bytesToBytes,
async decode(
configuration: Configuration,
encoded: Uint8Array,
cancellationToken: CancellationToken,
): Promise<Uint8Array> {
configuration;
cancellationToken;
return new Uint8Array(await decodeGzip(encoded));
},
});
for (const [name, compressionFormat] of [
["gzip", "gzip"],
["zlib", "deflate"],
] as const) {
registerCodec({
name,
kind: CodecKind.bytesToBytes,
async decode(
configuration: Configuration,
encoded: Uint8Array,
cancellationToken: CancellationToken,
): Promise<Uint8Array> {
configuration;
cancellationToken;
return new Uint8Array(await decodeGzip(encoded, compressionFormat));
},
});
}
20 changes: 11 additions & 9 deletions src/datasource/zarr/codec/gzip/resolve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ export interface Configuration {
level: number;
}

registerCodec({
name: "gzip",
kind: CodecKind.bytesToBytes,
resolve(configuration: unknown): { configuration: Configuration } {
verifyObject(configuration);
const level = verifyObjectProperty(configuration, "level", verifyInt);
return { configuration: { level } };
},
});
for (const name of ["gzip", "zlib"]) {
registerCodec({
name,
kind: CodecKind.bytesToBytes,
resolve(configuration: unknown): { configuration: Configuration } {
verifyObject(configuration);
const level = verifyObjectProperty(configuration, "level", verifyInt);
return { configuration: { level } };
},
});
}
9 changes: 1 addition & 8 deletions src/datasource/zarr/metadata/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -396,16 +396,9 @@ export function parseV2Metadata(
break;
case "zlib":
case "gzip":
codecs.push({
name: "gzip",
configuration: {
level: verifyObjectProperty(compressor, "level", verifyInt),
},
});
break;
case "zstd":
codecs.push({
name: "zstd",
name: id,
configuration: {
level: verifyObjectProperty(compressor, "level", verifyInt),
},
Expand Down
4 changes: 3 additions & 1 deletion src/sliceview/backend_chunk_decoders/bossNpz.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ export async function decodeBossNpzChunk(
cancellationToken: CancellationToken,
response: ArrayBuffer,
) {
const parseResult = parseNpy(new Uint8Array(await decodeGzip(response)));
const parseResult = parseNpy(
new Uint8Array(await decodeGzip(response, "deflate")),
);
const chunkDataSize = chunk.chunkDataSize!;
const source = chunk.source!;
const { shape } = parseResult;
Expand Down
4 changes: 3 additions & 1 deletion src/sliceview/backend_chunk_decoders/ndstoreNpz.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ export async function decodeNdstoreNpzChunk(
cancellationToken: CancellationToken,
response: ArrayBuffer,
) {
const parseResult = parseNpy(new Uint8Array(await decodeGzip(response)));
const parseResult = parseNpy(
new Uint8Array(await decodeGzip(response, "deflate")),
);
const chunkDataSize = chunk.chunkDataSize!;
const source = chunk.source!;
const { shape } = parseResult;
Expand Down
9 changes: 6 additions & 3 deletions src/util/gzip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ export function isGzipFormat(data: ArrayBufferView) {
return view.length > 2 && view[0] === 0x1f && view[1] === 0x8b;
}

export async function decodeGzip(data: ArrayBuffer | ArrayBufferView) {
export async function decodeGzip(
data: ArrayBuffer | ArrayBufferView,
format: CompressionFormat,
) {
const decompressedStream = new Response(data).body!.pipeThrough(
new DecompressionStream("gzip"),
new DecompressionStream(format),
);
return await new Response(decompressedStream).arrayBuffer();
}
Expand All @@ -40,7 +43,7 @@ export async function maybeDecompressGzip(data: ArrayBuffer | ArrayBufferView) {
byteView = new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
}
if (isGzipFormat(byteView)) {
return new Uint8Array(await decodeGzip(byteView));
return new Uint8Array(await decodeGzip(byteView, "gzip"));
}
return byteView;
}

0 comments on commit e9c1ff7

Please sign in to comment.