Skip to content

Commit

Permalink
Convert the CRC32 constant generation code to Python
Browse files Browse the repository at this point in the history
  • Loading branch information
ebiggers committed Nov 28, 2024
1 parent 1a6f436 commit bd57e93
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 310 deletions.
2 changes: 1 addition & 1 deletion lib/crc32_multipliers.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* crc32_multipliers.h - constants for CRC-32 folding
*
* THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT.
* THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.
*/

#define CRC32_X159_MODG 0xae689191 /* x^159 mod G(x) */
Expand Down
2 changes: 1 addition & 1 deletion lib/crc32_tables.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* crc32_tables.h - data tables for CRC-32 computation
*
* THIS FILE WAS GENERATED BY gen_crc32_tables.c. DO NOT EDIT.
* THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.
*/

static const u32 crc32_slice1_table[] MAYBE_UNUSED = {
Expand Down
4 changes: 2 additions & 2 deletions lib/x86/crc32_pclmul_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
* instructions. Note that the x86 crc32 instruction cannot be used, as it is
* for a different polynomial, not the gzip one. For an explanation of CRC
* folding with carryless multiplication instructions, see
* scripts/gen_crc32_multipliers.c and the following blog posts and papers:
* scripts/gen-crc32-consts.py and the following blog posts and papers:
*
* "An alternative exposition of crc32_4k_pclmulqdq"
* https://www.corsix.org/content/alternative-exposition-crc32_4k_pclmulqdq
Expand Down Expand Up @@ -189,7 +189,7 @@ ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
* folding across 128 bits. mults_128b differs from mults_1v when
* VL != 16. All multipliers are 64-bit, to match what pclmulqdq needs,
* but since this is for CRC-32 only their low 32 bits are nonzero.
* For more details, see scripts/gen_crc32_multipliers.c.
* For more details, see scripts/gen-crc32-consts.py.
*/
const vec_t mults_8v = MULTS_8V;
const vec_t mults_4v = MULTS_4V;
Expand Down
158 changes: 158 additions & 0 deletions scripts/gen-crc32-consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/env python3
#
# This script generates constants for efficient computation of the gzip CRC-32.

import sys

# This is the generator polynomial G(x) of the gzip CRC-32, represented as an
# int using the natural mapping between bits and polynomial coefficients.
G = 0x104c11db7

# XOR (add) an iterable of polynomials.
def xor(iterable):
res = 0
for val in iterable:
res ^= val
return res

# Multiply two polynomials.
def clmul(a, b):
return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0)

# Polynomial division floor(a / b).
def div(a, b):
q = 0
while a.bit_length() >= b.bit_length():
q ^= 1 << (a.bit_length() - b.bit_length())
a ^= b << (a.bit_length() - b.bit_length())
return q

# Reduce the polynomial 'a' modulo the polynomial 'b'.
def reduce(a, b):
return a ^ clmul(div(a, b), b)

# Reverse the bits of a polynomial.
def bitreverse(poly, num_bits):
return xor(1 << (num_bits - 1 - i) for i in range(num_bits)
if (poly & (1 << i)) != 0)

# Compute x^d mod G.
def x_to_the_d(d):
if d < G.bit_length() - 1:
return 1 << d
t = x_to_the_d(d//2)
t = clmul(t, t)
if d % 2 != 0:
t <<= 1
return reduce(t, G)

def gen_tables():
print('/*')
print(' * crc32_tables.h - data tables for CRC-32 computation')
print(' *')
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.')
print(' */')
for n in [1, 8]:
print('')
print(f'static const u32 crc32_slice{n}_table[] MAYBE_UNUSED = {{')
# The i'th table entry is the CRC-32 of the message consisting of byte
# i % 256 followed by i // 256 zero bytes.
polys = [bitreverse(i % 256, 8) << (32 + 8*(i//256)) for i in range(256 * n)]
polys = [bitreverse(reduce(poly, G), 32) for poly in polys]
for i in range(0, len(polys), 4):
print(f'\t0x{polys[i+0]:08x}, 0x{polys[i+1]:08x}, 0x{polys[i+2]:08x}, 0x{polys[i+3]:08x},')
print('};')

# Compute the constant multipliers needed for "folding" over various distances
# with the gzip CRC-32. Each such multiplier is x^d mod G(x) for some distance
# d, in bits, over which the folding is occurring.
#
# Folding works as follows: let A(x) be a polynomial (possibly reduced partially
# or fully mod G(x)) for part of the message, and let B(x) be a polynomial
# (possibly reduced partially or fully mod G(x)) for a later part of the
# message. The unreduced combined polynomial is A(x)*x^d + B(x), where d is the
# number of bits separating the two parts of the message plus len(B(x)). Since
# mod G(x) can be applied at any point, x^d mod G(x) can be precomputed and used
# instead of x^d unreduced. That allows the combined polynomial to be computed
# relatively easily in a partially-reduced form A(x)*(x^d mod G(x)) + B(x), with
# length max(len(A(x)) + 31, len(B(x))). This does require doing a polynomial
# multiplication (carryless multiplication).
#
# "Folding" in this way can be used for the entire CRC computation except the
# final reduction to 32 bits; this works well when CPU support for carryless
# multiplication is available. It can also be used to combine CRCs of different
# parts of the message that were computed using a different method.
#
# Note that the gzip CRC-32 uses bit-reversed polynomials. I.e., the low order
# bits are really the high order polynomial coefficients.
def gen_multipliers():
print('/*')
print(' * crc32_multipliers.h - constants for CRC-32 folding')
print(' *')
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.')
print(' */')
print('')

# Compute the multipliers needed for CRC-32 folding with carryless
# multiplication instructions that operate on the 64-bit halves of 128-bit
# segments. Using the terminology from earlier, for each 64-bit fold
# len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial multiplied by
# a 32-bit one produces a 95-bit one. When A(x) is the low order polynomial
# half of a 128-bit segments (high order physical half), the separation
# between the message parts is the total length of the 128-bit segments
# separating the values. When A(x) is the high order polynomial half, the
# separation is 64 bits greater.
for i in range(1, 33):
sep_lo = 128 * (i - 1)
sep_hi = sep_lo + 64
len_B = 95
for d in [sep_hi + len_B, # A(x) = high 64 polynomial bits (low 64 physical bits)
sep_lo + len_B # A(x) = low 64 polynomial bits (high 64 physical bits)
]:
poly = bitreverse(x_to_the_d(d), 32)
print(f'#define CRC32_X{d}_MODG 0x{poly:08x} /* x^{d} mod G(x) */')
print('')

# Compute constants for the final 128 => 32 bit reduction.
poly = bitreverse(div(1 << 95, G), 64)
print(f'#define CRC32_BARRETT_CONSTANT_1 0x{poly:016x}ULL /* floor(x^95 / G(x)) */')
poly = bitreverse(G, 33)
print(f'#define CRC32_BARRETT_CONSTANT_2 0x{poly:016x}ULL /* G(x) */')

# Compute multipliers for combining the CRCs of separate chunks.
print('')
num_chunks = 4
table_len = 129
min_chunk_len = 128
print(f'#define CRC32_NUM_CHUNKS {num_chunks}')
print(f'#define CRC32_MIN_VARIABLE_CHUNK_LEN {min_chunk_len}UL')
print(f'#define CRC32_MAX_VARIABLE_CHUNK_LEN {(table_len-1) * min_chunk_len}UL')
print('')
print('/* Multipliers for implementations that use a variable chunk length */')
print('static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {')
print('\t{ 0 /* unused row */ },')
for i in range(1, table_len):
chunk_len = i * min_chunk_len
print(f'\t/* chunk_len={chunk_len} */')
print('\t{ ', end='')
for j in range(num_chunks - 1, 0, -1):
d = (j * 8 * chunk_len) - 33
poly = bitreverse(x_to_the_d(d), 32)
print(f'0x{poly:08x} /* x^{d} mod G(x) */, ', end='')
print('},')
print('};')
fixed_chunk_len = 32768
print('')
print('/* Multipliers for implementations that use a large fixed chunk length */')
print(f'#define CRC32_FIXED_CHUNK_LEN {fixed_chunk_len}UL')
for j in range(1, num_chunks):
d = (j * 8 * fixed_chunk_len) - 33
poly = bitreverse(x_to_the_d(d), 32)
print(f'#define CRC32_FIXED_CHUNK_MULT_{j} 0x{poly:08x} /* x^{d} mod G(x) */')

with open('lib/crc32_tables.h', 'w') as f:
sys.stdout = f
gen_tables()
with open('lib/crc32_multipliers.h', 'w') as f:
sys.stdout = f
gen_multipliers()
201 changes: 0 additions & 201 deletions scripts/gen_crc32_multipliers.c

This file was deleted.

Loading

2 comments on commit bd57e93

@cieplutki
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is a rationale behind that?
Seems to me like adding another dependency, when 'old' code in already used language was seemingly fine.

@ebiggers
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no new dependency. The generated constants are checked into the repo, and always have been. Moreover, other data tables in libdeflate were already generated by Python scripts (see scripts/*.py). Unless you are developing libdeflate and want to change the corresponding data tables, there is no need to run any of these scripts.

The new Python script is half the length of the C programs it replaces (taking advantage of things like Python integers being arbitrary length) and is more convenient for me.

Please sign in to comment.