@@ -2164,28 +2164,95 @@ static void dirty_all_reference_blocks(struct vdo_slab *slab)
21642164 dirty_block (& slab -> reference_blocks [i ]);
21652165}
21662166
2167+ static inline bool journal_points_equal (struct journal_point first ,
2168+ struct journal_point second )
2169+ {
2170+ return ((first .sequence_number == second .sequence_number ) &&
2171+ (first .entry_count == second .entry_count ));
2172+ }
2173+
21672174/**
2168- * clear_provisional_references() - Clear the provisional reference counts from a reference block.
2169- * @block: The block to clear.
2175+ * match_bytes() - Check an 8-byte word for bytes matching the value specified
2176+ * @input: A word to examine the bytes of
2177+ * @match: The byte value sought
2178+ *
2179+ * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
21702180 */
2171- static void clear_provisional_references ( struct reference_block * block )
2181+ static inline u64 match_bytes ( u64 input , u8 match )
21722182{
2173- vdo_refcount_t * counters = get_reference_counters_for_block (block );
2174- block_count_t j ;
2183+ u64 temp = input ^ (match * 0x0101010101010101ULL );
2184+ /* top bit of each byte is set iff top bit of temp byte is clear; rest are 0 */
2185+ u64 test_top_bits = ~temp & 0x8080808080808080ULL ;
2186+ /* top bit of each byte is set iff low 7 bits of temp byte are clear; rest are useless */
2187+ u64 test_low_bits = 0x8080808080808080ULL - (temp & 0x7f7f7f7f7f7f7f7fULL );
2188+ /* return 1 when both tests indicate temp byte is 0 */
2189+ return (test_top_bits & test_low_bits ) >> 7 ;
2190+ }
2191+
2192+ /**
2193+ * count_valid_references() - Process a newly loaded refcount array
2194+ * @counters: the array of counters from a metadata block
2195+ *
2196+ * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
2197+ * cleaned up at shutdown, changing them internally to "empty".
2198+ *
2199+ * Return: the number of blocks that are referenced (counters not "empty")
2200+ */
2201+ static unsigned int count_valid_references (vdo_refcount_t * counters )
2202+ {
2203+ u64 * words = (u64 * )counters ;
2204+ /* It's easier to count occurrences of a specific byte than its absences. */
2205+ unsigned int empty_count = 0 ;
2206+ /* For speed, we process 8 bytes at once. */
2207+ unsigned int words_left = COUNTS_PER_BLOCK / sizeof (u64 );
2208+
2209+ /*
2210+ * Sanity check assumptions used for optimizing this code: Counters are bytes. The counter
2211+ * array is a multiple of the word size.
2212+ */
2213+ BUILD_BUG_ON (sizeof (vdo_refcount_t ) != 1 );
2214+ BUILD_BUG_ON ((COUNTS_PER_BLOCK % sizeof (u64 )) != 0 );
21752215
2176- for (j = 0 ; j < COUNTS_PER_BLOCK ; j ++ ) {
2177- if (counters [j ] == PROVISIONAL_REFERENCE_COUNT ) {
2178- counters [j ] = EMPTY_REFERENCE_COUNT ;
2179- block -> allocated_count -- ;
2216+ while (words_left > 0 ) {
2217+ /*
2218+ * This is used effectively as 8 byte-size counters. Byte 0 counts how many words
2219+ * had the target value found in byte 0, etc. We just have to avoid overflow.
2220+ */
2221+ u64 split_count = 0 ;
2222+ /*
2223+ * The counter "% 255" trick used below to fold split_count into empty_count
2224+ * imposes a limit of 254 bytes examined each iteration of the outer loop. We
2225+ * process a word at a time, so that limit gets rounded down to 31 u64 words.
2226+ */
2227+ const unsigned int max_words_per_iteration = 254 / sizeof (u64 );
2228+ unsigned int iter_words_left = min_t (unsigned int , words_left ,
2229+ max_words_per_iteration );
2230+
2231+ words_left -= iter_words_left ;
2232+
2233+ while (iter_words_left -- ) {
2234+ u64 word = * words ;
2235+ u64 temp ;
2236+
2237+ /* First, if we have any provisional refcount values, clear them. */
2238+ temp = match_bytes (word , PROVISIONAL_REFERENCE_COUNT );
2239+ if (temp ) {
2240+ /*
2241+ * 'temp' has 0x01 bytes where 'word' has PROVISIONAL; this xor
2242+ * will alter just those bytes, changing PROVISIONAL to EMPTY.
2243+ */
2244+ word ^= temp * (PROVISIONAL_REFERENCE_COUNT ^ EMPTY_REFERENCE_COUNT );
2245+ * words = word ;
2246+ }
2247+
2248+ /* Now count the EMPTY_REFERENCE_COUNT bytes, updating the 8 counters. */
2249+ split_count += match_bytes (word , EMPTY_REFERENCE_COUNT );
2250+ words ++ ;
21802251 }
2252+ empty_count += split_count % 255 ;
21812253 }
2182- }
21832254
2184- static inline bool journal_points_equal (struct journal_point first ,
2185- struct journal_point second )
2186- {
2187- return ((first .sequence_number == second .sequence_number ) &&
2188- (first .entry_count == second .entry_count ));
2255+ return COUNTS_PER_BLOCK - empty_count ;
21892256}
21902257
21912258/**
@@ -2196,7 +2263,6 @@ static inline bool journal_points_equal(struct journal_point first,
21962263static void unpack_reference_block (struct packed_reference_block * packed ,
21972264 struct reference_block * block )
21982265{
2199- block_count_t index ;
22002266 sector_count_t i ;
22012267 struct vdo_slab * slab = block -> slab ;
22022268 vdo_refcount_t * counters = get_reference_counters_for_block (block );
@@ -2222,11 +2288,7 @@ static void unpack_reference_block(struct packed_reference_block *packed,
22222288 }
22232289 }
22242290
2225- block -> allocated_count = 0 ;
2226- for (index = 0 ; index < COUNTS_PER_BLOCK ; index ++ ) {
2227- if (counters [index ] != EMPTY_REFERENCE_COUNT )
2228- block -> allocated_count ++ ;
2229- }
2291+ block -> allocated_count = count_valid_references (counters );
22302292}
22312293
22322294/**
@@ -2247,7 +2309,6 @@ static void finish_reference_block_load(struct vdo_completion *completion)
22472309 struct packed_reference_block * packed = (struct packed_reference_block * ) data ;
22482310
22492311 unpack_reference_block (packed , block );
2250- clear_provisional_references (block );
22512312 slab -> free_blocks -= block -> allocated_count ;
22522313 }
22532314 return_vio_to_pool (pooled );
0 commit comments