Skip to content

Commit

Permalink
v1.4.3: Merge pull request #1730 from facebook/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
felixhandte authored Aug 19, 2019
2 parents ff304e9 + c9072ee commit a3d655d
Show file tree
Hide file tree
Showing 24 changed files with 300 additions and 177 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ matrix:
- make ppcinstall
- make ppcfuzz

- name: Trusty (Versions Compatibility Test)
script:
- make -C tests versionsTest

# check release number
- name: Tag-Specific Test
if: tag =~ ^v[0-9]\.[0-9]
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v1.4.3
bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722)
build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
misc: Add NULL pointer check in util.c by @leeyoung624 (#1706)

v1.4.2
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
Expand Down
20 changes: 11 additions & 9 deletions doc/zstd_manual.html
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>zstd 1.4.2 Manual</title>
<title>zstd 1.4.3 Manual</title>
</head>
<body>
<h1>zstd 1.4.2 Manual</h1>
<h1>zstd 1.4.3 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
Expand Down Expand Up @@ -1557,7 +1557,7 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef
<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>

<pre><b></b><p> Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
User will have to take in charge required information to regenerate data, such as compressed and content sizes.
But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.

A few rules to respect :
- Compressing and decompressing require a context structure
Expand All @@ -1568,12 +1568,14 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef
+ copyCCtx() and copyDCtx() can be used too
- Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+ If input is larger than a block size, it's necessary to split input data into multiple blocks
+ For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
In which case, nothing is produced into `dst` !
+ User must test for such outcome and deal directly with uncompressed data
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+ For inputs larger than a single block, consider using regular ZSTD_compress() instead.
Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
===> In which case, nothing is produced into `dst` !
+ User __must__ test for such outcome and deal directly with uncompressed data
+ A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
Doing so would mess up with statistics history, leading to potential data corruption.
+ ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+ In case of multiple successive blocks, should some of them be uncompressed,
decoder must be informed of their existence in order to follow proper history.
Use ZSTD_insertBlock() for such a case.
Expand Down
4 changes: 4 additions & 0 deletions lib/common/bitstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ extern "C" {
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#endif

#define STREAM_ACCUMULATOR_MIN_32 25
Expand Down Expand Up @@ -163,6 +165,8 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
Expand Down
6 changes: 3 additions & 3 deletions lib/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# define INLINE_KEYWORD
#endif

#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
Expand Down Expand Up @@ -65,7 +65,7 @@
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
# ifdef __GNUC__
# if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
Expand All @@ -76,7 +76,7 @@
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
Expand Down
2 changes: 1 addition & 1 deletion lib/common/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
Expand Down
10 changes: 8 additions & 2 deletions lib/common/xxhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
defined(__ICCARM__)
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
Expand Down Expand Up @@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
# define INLINE_KEYWORD
#endif

#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
Expand Down Expand Up @@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
#if defined(__ICCARM__)
# include <intrinsics.h>
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif

Expand Down
8 changes: 5 additions & 3 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ extern "C" {
/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information. In order to do that
* (particularly, printing the conditional that failed), this can't just wrap
* RETURN_ERROR().
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
Expand Down Expand Up @@ -324,6 +324,8 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
Expand Down
26 changes: 14 additions & 12 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1955,7 +1955,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
BYTE* seqHead;
BYTE* lastNCount = NULL;

Expand All @@ -1964,7 +1964,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,

/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = seqStorePtr->lit - literals;
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
size_t const cSize = ZSTD_compressLiterals(
&prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy,
Expand All @@ -1991,7 +1991,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
return op - ostart;
return (size_t)(op - ostart);
}

/* seqHead : flags for FSE encoding type */
Expand All @@ -2012,7 +2012,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
{ size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
workspace, wkspSize);
Expand All @@ -2035,7 +2035,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
{ size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
workspace, wkspSize);
Expand All @@ -2056,7 +2056,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
{ size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
workspace, wkspSize);
Expand All @@ -2070,7 +2070,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));

{ size_t const bitstreamSize = ZSTD_encodeSequences(
op, oend - op,
op, (size_t)(oend - op),
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
Expand All @@ -2097,7 +2097,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
}

DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
return op - ostart;
return (size_t)(op - ostart);
}

MEM_STATIC size_t
Expand Down Expand Up @@ -2270,7 +2270,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{
size_t cSize;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate);

{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss);
Expand Down Expand Up @@ -2538,8 +2539,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)

size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong);
DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
{ size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); }

return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
Expand All @@ -2564,7 +2566,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
if (srcSize <= HASH_READ_SIZE) return 0;

while (iend - ip > HASH_READ_SIZE) {
size_t const remaining = iend - ip;
size_t const remaining = (size_t)(iend - ip);
size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
const BYTE* const ichunk = ip + chunk;

Expand Down
60 changes: 45 additions & 15 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,15 @@ typedef struct {
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
* When loadedDictEnd != 0, a dictionary is in use, and still valid.
* This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
* Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
* When dict referential is copied into active context (i.e. not attached),
* loadedDictEnd == dictSize, since referential starts from zero.
*/
U32 nextToUpdate; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
Expand Down Expand Up @@ -350,7 +356,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
seqStorePtr->lit += litLength;

/* literal Length */
Expand Down Expand Up @@ -763,24 +769,37 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,

/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
* when input progresses beyond window size.
* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
* loadedDictEnd uses same referential as window->base
* maxDist is the window size */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
ZSTD_checkDictValidity(const ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);

if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
assert(loadedDictEndPtr != NULL);
assert(dictMatchStatePtr != NULL);
{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = *loadedDictEndPtr;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd);

if (blockEndIdx > loadedDictEnd + maxDist) {
/* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block.
*/
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
*dictMatchStatePtr = NULL;
} else {
if (*loadedDictEndPtr != 0) {
DEBUGLOG(6, "dictionary considered valid for current block");
} } }
}

/**
Expand Down Expand Up @@ -822,6 +841,17 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
return contiguous;
}

MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
{
U32 const maxDistance = 1U << windowLog;
U32 const lowestValid = ms->window.lowLimit;
U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
U32 const isDictionary = (ms->loadedDictEnd != 0);
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
return matchLowest;
}



/* debug functions */
#if (DEBUGLEVEL>=2)
Expand Down
Loading

0 comments on commit a3d655d

Please sign in to comment.