-
-
Notifications
You must be signed in to change notification settings - Fork 158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refuse to read future format versions #217
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,10 +90,8 @@ struct blosc_context { | |
|
||
const uint8_t* src; | ||
uint8_t* dest; /* The current pos in the destination buffer */ | ||
uint8_t* header_flags; /* Flags for header. Currently booked: | ||
- 0: byte-shuffled? | ||
- 1: memcpy'ed? | ||
- 2: bit-shuffled? */ | ||
uint8_t* header_flags; /* Flags for header */ | ||
int compressor_version; /* Compressor version byte, only used during decompression */ | ||
int32_t sourcesize; /* Number of bytes in source buffer (or uncompressed bytes in compressed file) */ | ||
int32_t nblocks; /* Number of total blocks in buffer */ | ||
int32_t leftover; /* Extra bytes at end of buffer */ | ||
|
@@ -691,6 +689,7 @@ static int blosc_d(struct blosc_context* context, int32_t blocksize, | |
int doshuffle = (header_flags & BLOSC_DOSHUFFLE) && (typesize > 1); | ||
int dobitshuffle = ((header_flags & BLOSC_DOBITSHUFFLE) && | ||
(blocksize >= typesize)); | ||
int compressor_version = context->compressor_version; | ||
|
||
if (doshuffle || dobitshuffle) { | ||
_tmp = tmp; | ||
|
@@ -719,28 +718,38 @@ static int blosc_d(struct blosc_context* context, int32_t blocksize, | |
} | ||
else { | ||
if (compformat == BLOSC_BLOSCLZ_FORMAT) { | ||
if (compressor_version != BLOSC_BLOSCLZ_VERSION_FORMAT) | ||
return -9; | ||
nbytes = blosclz_decompress(src, cbytes, _tmp, neblock); | ||
} | ||
#if defined(HAVE_LZ4) | ||
else if (compformat == BLOSC_LZ4_FORMAT) { | ||
if (compressor_version != BLOSC_LZ4_VERSION_FORMAT) | ||
return -9; | ||
nbytes = lz4_wrap_decompress((char *)src, (size_t)cbytes, | ||
(char*)_tmp, (size_t)neblock); | ||
} | ||
#endif /* HAVE_LZ4 */ | ||
#if defined(HAVE_SNAPPY) | ||
else if (compformat == BLOSC_SNAPPY_FORMAT) { | ||
if (compressor_version != BLOSC_SNAPPY_VERSION_FORMAT) | ||
return -1; | ||
nbytes = snappy_wrap_decompress((char *)src, (size_t)cbytes, | ||
(char*)_tmp, (size_t)neblock); | ||
} | ||
#endif /* HAVE_SNAPPY */ | ||
#if defined(HAVE_ZLIB) | ||
else if (compformat == BLOSC_ZLIB_FORMAT) { | ||
if (compressor_version != BLOSC_ZLIB_VERSION_FORMAT) | ||
return -1; | ||
nbytes = zlib_wrap_decompress((char *)src, (size_t)cbytes, | ||
(char*)_tmp, (size_t)neblock); | ||
} | ||
#endif /* HAVE_ZLIB */ | ||
#if defined(HAVE_ZSTD) | ||
else if (compformat == BLOSC_ZSTD_FORMAT) { | ||
if (compressor_version != BLOSC_ZSTD_VERSION_FORMAT) | ||
return -1; | ||
nbytes = zstd_wrap_decompress((char*)src, (size_t)cbytes, | ||
(char*)_tmp, (size_t)neblock); | ||
} | ||
|
@@ -1355,18 +1364,21 @@ int blosc_run_decompression_with_context(struct blosc_context* context, | |
|
||
/* Read the header block */ | ||
version = context->src[0]; /* blosc format version */ | ||
versionlz = context->src[1]; /* blosclz format version */ | ||
context->compressor_version = context->src[1]; | ||
|
||
context->header_flags = (uint8_t*)(context->src + 2); /* flags */ | ||
context->typesize = (int32_t)context->src[3]; /* typesize */ | ||
context->sourcesize = sw32_(context->src + 4); /* buffer size */ | ||
context->blocksize = sw32_(context->src + 8); /* block size */ | ||
ctbytes = sw32_(context->src + 12); /* compressed buffer size */ | ||
|
||
/* Unused values */ | ||
version += 0; /* shut up compiler warning */ | ||
versionlz += 0; /* shut up compiler warning */ | ||
ctbytes += 0; /* shut up compiler warning */ | ||
if (version != BLOSC_VERSION_FORMAT) { | ||
/* Version from future */ | ||
return -1; | ||
} | ||
if (*context->header_flags & 0x08) { | ||
/* compressor flags from the future */ | ||
return -1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
} | ||
|
||
context->bstarts = (uint8_t*)(context->src + 16); | ||
/* Compute some params */ | ||
|
@@ -1455,7 +1467,7 @@ int blosc_decompress(const void *src, void *dest, size_t destsize) | |
int blosc_getitem(const void *src, int start, int nitems, void *dest) | ||
{ | ||
uint8_t *_src=NULL; /* current pos for source buffer */ | ||
uint8_t version, versionlz; /* versions for compressed header */ | ||
uint8_t version, compressor_version; /* versions for compressed header */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For being consistent with blosc convention for names(e.g. |
||
uint8_t flags; /* flags for header */ | ||
int32_t ntbytes = 0; /* the number of uncompressed bytes */ | ||
int32_t nblocks; /* number of total blocks in buffer */ | ||
|
@@ -1474,22 +1486,21 @@ int blosc_getitem(const void *src, int start, int nitems, void *dest) | |
|
||
/* Read the header block */ | ||
version = _src[0]; /* blosc format version */ | ||
versionlz = _src[1]; /* blosclz format version */ | ||
compressor_version = _src[1]; | ||
flags = _src[2]; /* flags */ | ||
typesize = (int32_t)_src[3]; /* typesize */ | ||
nbytes = sw32_(_src + 4); /* buffer size */ | ||
blocksize = sw32_(_src + 8); /* block size */ | ||
ctbytes = sw32_(_src + 12); /* compressed buffer size */ | ||
|
||
if (version != BLOSC_VERSION_FORMAT) | ||
return -9; | ||
|
||
ebsize = blocksize + typesize * (int32_t)sizeof(int32_t); | ||
tmp = my_malloc(blocksize + ebsize + blocksize); | ||
tmp2 = tmp + blocksize; | ||
tmp3 = tmp + blocksize + ebsize; | ||
|
||
version += 0; /* shut up compiler warning */ | ||
versionlz += 0; /* shut up compiler warning */ | ||
ctbytes += 0; /* shut up compiler warning */ | ||
|
||
_src += 16; | ||
bstarts = _src; | ||
/* Compute some params */ | ||
|
@@ -1540,10 +1551,11 @@ int blosc_getitem(const void *src, int start, int nitems, void *dest) | |
cbytes = bsize2; | ||
} | ||
else { | ||
struct blosc_context context; | ||
/* blosc_d only uses typesize and flags */ | ||
struct blosc_context context = {0}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 Nice way to initialize struct to 0. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! It is makes the code slightly slower, but so much easier to debug! |
||
/* Only initialize the fields blosc_d uses */ | ||
context.typesize = typesize; | ||
context.header_flags = &flags; | ||
context.compressor_version = compressor_version; | ||
|
||
/* Regular decompression. Put results in tmp2. */ | ||
cbytes = blosc_d(&context, bsize, leftoverblock, | ||
|
@@ -1969,14 +1981,12 @@ void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, | |
size_t *cbytes, size_t *blocksize) | ||
{ | ||
uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ | ||
uint8_t version, versionlz; /* versions for compressed header */ | ||
|
||
/* Read the version info (could be useful in the future) */ | ||
version = _src[0]; /* blosc format version */ | ||
versionlz = _src[1]; /* blosclz format version */ | ||
uint8_t version = _src[0]; /* version of header */ | ||
|
||
version += 0; /* shut up compiler warning */ | ||
versionlz += 0; /* shut up compiler warning */ | ||
if (version != BLOSC_VERSION_FORMAT) { | ||
*nbytes = *blocksize = *cbytes = 0; | ||
return; | ||
} | ||
|
||
/* Read the interesting values */ | ||
*nbytes = (size_t)sw32_(_src + 4); /* uncompressed buffer size */ | ||
|
@@ -1990,17 +2000,16 @@ void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, | |
int *flags) | ||
{ | ||
uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ | ||
uint8_t version, versionlz; /* versions for compressed header */ | ||
|
||
/* Read the version info (could be useful in the future) */ | ||
version = _src[0]; /* blosc format version */ | ||
versionlz = _src[1]; /* blosclz format version */ | ||
uint8_t version = _src[0]; /* version of header */ | ||
|
||
version += 0; /* shut up compiler warning */ | ||
versionlz += 0; /* shut up compiler warning */ | ||
if (version != BLOSC_VERSION_FORMAT) { | ||
*flags = *typesize = 0; | ||
return; | ||
} | ||
|
||
/* Read the interesting values */ | ||
*flags = (int)_src[2]; /* flags */ | ||
*flags = (int)_src[2] & 7; /* first three flags */ | ||
*typesize = (size_t)_src[3]; /* typesize */ | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May it be worth to print some informative message before giving up? Something like:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't like libraries that print messages, that descision should be given to the caller. Before we have a way to return error messages, I prefer silent errors. Maybe we can return -10 and document what it means in the API documentation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. However, the current approach is to print an informative message before returning a negative number. I don't think that changing this behavior in this PR in just a couple of places is a good idea. Probably the good thing to do is to detect all the places where a negative code is returned, make sure that there are no repetitions and document these codes. One can even think on creating a new API (`blosc_print_error(int)?) for more verbose output. But this would be another PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we need at least to move the error printing out of the helper threads to avoid printing garbled output. I can do that refactor in a separate pull request with the existing error messages, and push that first, if you want.
[EDIT: I think printf has a mutex, so the output won't be garbled but it will be duplicates]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, maybe a printf is not strictly necessary. Perhaps a
char* blosc_get_error(int)
would be enough. A better approach would be to define an enumerated for the different errors and some functions for returning proper messages. The Zstandard approach could be a good example to follow.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds like a good idea to me, the Zstd approach resembles what I've seen in other libraries. I also don't think a library should print things unless told to - what to do with the error should be the user's decision (where the user here is a programmer).