From 28192c6678b9baca20a454780f934c1c0868c89e Mon Sep 17 00:00:00 2001 From: Brotli Date: Wed, 21 Feb 2024 18:26:25 -0800 Subject: [PATCH] Prefetch the backreference hashtable bucket. Place the prefetch before the last distance checks, to give the prefetch enough time to work. PiperOrigin-RevId: 609192361 --- c/common/platform.h | 15 +++++++++++++++ c/enc/hash_longest_match64_inc.h | 7 +++++-- c/enc/hash_longest_match_inc.h | 13 ++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/c/common/platform.h b/c/common/platform.h index dbba9424..c18ff029 100644 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { #if BROTLI_ENABLE_DUMP BROTLI_UNUSED(&BrotliDump); #endif + +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +#elif defined(__aarch64__) +# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0) +# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0) +#else +# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ +# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ +#endif } #endif /* BROTLI_COMMON_PLATFORM_H_ */ diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index 441ab86b..7e0b2f57 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -170,6 +170,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)( score_t best_score = out->score; size_t best_len = out->len; size_t i; + /* Precalculate the hash key and prefetch the bucket. */ + const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_); + uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; + PREFETCH_L1(bucket); + if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16); out->len = 0; out->len_code_delta = 0; @@ -220,8 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)( best_len = 3; } { - const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_); - uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; const size_t down = (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u; diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h index 4679dac3..b744f4d3 100644 --- a/c/enc/hash_longest_match_inc.h +++ b/c/enc/hash_longest_match_inc.h @@ -169,11 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)( score_t best_score = out->score; size_t best_len = out->len; size_t i; + /* Precalculate the hash key and prefetch the bucket. */ + const uint32_t key = + FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_); + uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; + PREFETCH_L1(bucket); + if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16); + out->len = 0; + out->len_code_delta = 0; BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask); - out->len = 0; - out->len_code_delta = 0; /* Try last distance first. */ for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) { const size_t backward = (size_t)distance_cache[i]; @@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)( best_len = 3; } { - const uint32_t key = - FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_); - uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; const size_t down = (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u; for (i = num[key]; i > down;) {