Skip to content

Commit

Permalink
Reduce RowHash's tag space size by x2
Browse files Browse the repository at this point in the history
Allocate half the memory for tag space, which means that we get one less slot for an actual tag (needs to be used for next position index).
In turn, we slash the memory usage for slightly worse compression ratio or better ratio if we use the same memory size with a higher hashLog.
  • Loading branch information
yoniko committed Mar 10, 2023
1 parent db7d7b6 commit 69be424
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 321 deletions.
10 changes: 5 additions & 5 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
Expand Down Expand Up @@ -1945,8 +1945,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,

if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
{ /* Row match finder needs an additional table of hashes ("tags") */
size_t const tagTableSize = hSize*sizeof(U16);
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
size_t const tagTableSize = hSize;
ms->tagTable = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
}
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
Expand Down Expand Up @@ -2339,7 +2339,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
}
/* copy tag table */
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
size_t const tagTableSize = hSize*sizeof(U16);
size_t const tagTableSize = hSize;
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
cdict->matchState.tagTable,
tagTableSize);
Expand Down Expand Up @@ -4690,7 +4690,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
} else {
assert(params->useRowMatchFinder != ZSTD_ps_auto);
if (params->useRowMatchFinder == ZSTD_ps_enable) {
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
ZSTD_memset(ms->tagTable, 0, tagTableSize);
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
DEBUGLOG(4, "Using row-based hash table for lazy dict");
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */

U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */

U32* hashTable;
Expand Down
31 changes: 15 additions & 16 deletions lib/compress/zstd_lazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,6 @@ size_t ZSTD_HcFindBestMatch(
* (SIMD) Row-based matchfinder
***********************************/
/* Constants for row-based hash */
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */

Expand Down Expand Up @@ -801,12 +800,13 @@ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {

/* ZSTD_row_nextIndex():
* Returns the next index to insert at within a tagTable row, and updates the "head"
* value to reflect the update. Essentially cycles backwards from [0, {entries per row})
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
U32 const next = (*tagRow - 1) & rowMask;
*tagRow = (BYTE)next;
return next;
U32 next = (*tagRow-1) & rowMask;
next += (next == 0) ? rowMask : 0; /* skip first position */
*tagRow = (BYTE)next;
return next;
}

/* ZSTD_isAligned():
Expand All @@ -820,7 +820,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
/* ZSTD_row_prefetch():
* Performs prefetching for the hashTable and tagTable at a given row.
*/
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
PREFETCH_L1(hashTable + relRow);
if (rowLog >= 5) {
PREFETCH_L1(hashTable + relRow + 16);
Expand All @@ -844,7 +844,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
U32 idx, const BYTE* const iLimit)
{
U32 const* const hashTable = ms->hashTable;
U16 const* const tagTable = ms->tagTable;
BYTE const* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
Expand All @@ -866,7 +866,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
U16 const* tagTable, BYTE const* base,
BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls)
{
Expand All @@ -888,7 +888,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
BYTE* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
const BYTE* const base = ms->window.base;

Expand All @@ -898,12 +898,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
: (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32* const row = hashTable + relRow;
BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
Explicit cast allows us to get exact desired position within each row */
BYTE* tagRow = tagTable + relRow;
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);

assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
row[pos] = updateStartIdx;
}
}
Expand Down Expand Up @@ -1059,7 +1058,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
FORCE_INLINE_TEMPLATE ZSTD_VecMask
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
{
const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
const BYTE* const src = tagRow;
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
Expand Down Expand Up @@ -1144,7 +1143,7 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
BYTE* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache;
const U32 hashLog = ms->rowHashLog;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
Expand Down Expand Up @@ -1188,7 +1187,7 @@ size_t ZSTD_RowFindBestMatch(
if (dictMode == ZSTD_dictMatchState) {
/* Prefetch DMS rows */
U32* const dmsHashTable = dms->hashTable;
U16* const dmsTagTable = dms->tagTable;
BYTE* const dmsTagTable = dms->tagTable;
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
Expand Down Expand Up @@ -1230,7 +1229,7 @@ size_t ZSTD_RowFindBestMatch(
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
tagRow[pos] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}

Expand Down
2 changes: 1 addition & 1 deletion tests/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -2295,7 +2295,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
3663, 3662, 3661, 3660, 3660,
3660, 3660, 3660 };
size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2893, 2820, 2940,
2950, 2950, 2925, 2900, 2891,
2950, 2950, 2925, 2900, 2892,
2910, 2910, 2910, 2780, 2775,
2765, 2760, 2755, 2754, 2753,
2753, 2753, 2753 };
Expand Down
Loading

0 comments on commit 69be424

Please sign in to comment.