-
Notifications
You must be signed in to change notification settings - Fork 173
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Convert the CRC32 constant generation code to Python
- Loading branch information
Showing
6 changed files
with
162 additions
and
310 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# This script generates constants for efficient computation of the gzip CRC-32. | ||
|
||
import sys | ||
|
||
# This is the generator polynomial G(x) of the gzip CRC-32, represented as an | ||
# int using the natural mapping between bits and polynomial coefficients. | ||
G = 0x104c11db7 | ||
|
||
# XOR (add) an iterable of polynomials. | ||
def xor(iterable): | ||
res = 0 | ||
for val in iterable: | ||
res ^= val | ||
return res | ||
|
||
# Multiply two polynomials. | ||
def clmul(a, b): | ||
return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0) | ||
|
||
# Polynomial division floor(a / b). | ||
def div(a, b): | ||
q = 0 | ||
while a.bit_length() >= b.bit_length(): | ||
q ^= 1 << (a.bit_length() - b.bit_length()) | ||
a ^= b << (a.bit_length() - b.bit_length()) | ||
return q | ||
|
||
# Reduce the polynomial 'a' modulo the polynomial 'b'. | ||
def reduce(a, b): | ||
return a ^ clmul(div(a, b), b) | ||
|
||
# Reverse the bits of a polynomial. | ||
def bitreverse(poly, num_bits): | ||
return xor(1 << (num_bits - 1 - i) for i in range(num_bits) | ||
if (poly & (1 << i)) != 0) | ||
|
||
# Compute x^d mod G. | ||
def x_to_the_d(d): | ||
if d < G.bit_length() - 1: | ||
return 1 << d | ||
t = x_to_the_d(d//2) | ||
t = clmul(t, t) | ||
if d % 2 != 0: | ||
t <<= 1 | ||
return reduce(t, G) | ||
|
||
def gen_tables(): | ||
print('/*') | ||
print(' * crc32_tables.h - data tables for CRC-32 computation') | ||
print(' *') | ||
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.') | ||
print(' */') | ||
for n in [1, 8]: | ||
print('') | ||
print(f'static const u32 crc32_slice{n}_table[] MAYBE_UNUSED = {{') | ||
# The i'th table entry is the CRC-32 of the message consisting of byte | ||
# i % 256 followed by i // 256 zero bytes. | ||
polys = [bitreverse(i % 256, 8) << (32 + 8*(i//256)) for i in range(256 * n)] | ||
polys = [bitreverse(reduce(poly, G), 32) for poly in polys] | ||
for i in range(0, len(polys), 4): | ||
print(f'\t0x{polys[i+0]:08x}, 0x{polys[i+1]:08x}, 0x{polys[i+2]:08x}, 0x{polys[i+3]:08x},') | ||
print('};') | ||
|
||
# Compute the constant multipliers needed for "folding" over various distances | ||
# with the gzip CRC-32. Each such multiplier is x^d mod G(x) for some distance | ||
# d, in bits, over which the folding is occurring. | ||
# | ||
# Folding works as follows: let A(x) be a polynomial (possibly reduced partially | ||
# or fully mod G(x)) for part of the message, and let B(x) be a polynomial | ||
# (possibly reduced partially or fully mod G(x)) for a later part of the | ||
# message. The unreduced combined polynomial is A(x)*x^d + B(x), where d is the | ||
# number of bits separating the two parts of the message plus len(B(x)). Since | ||
# mod G(x) can be applied at any point, x^d mod G(x) can be precomputed and used | ||
# instead of x^d unreduced. That allows the combined polynomial to be computed | ||
# relatively easily in a partially-reduced form A(x)*(x^d mod G(x)) + B(x), with | ||
# length max(len(A(x)) + 31, len(B(x))). This does require doing a polynomial | ||
# multiplication (carryless multiplication). | ||
# | ||
# "Folding" in this way can be used for the entire CRC computation except the | ||
# final reduction to 32 bits; this works well when CPU support for carryless | ||
# multiplication is available. It can also be used to combine CRCs of different | ||
# parts of the message that were computed using a different method. | ||
# | ||
# Note that the gzip CRC-32 uses bit-reversed polynomials. I.e., the low order | ||
# bits are really the high order polynomial coefficients. | ||
def gen_multipliers(): | ||
print('/*') | ||
print(' * crc32_multipliers.h - constants for CRC-32 folding') | ||
print(' *') | ||
print(' * THIS FILE WAS GENERATED BY gen-crc32-consts.py. DO NOT EDIT.') | ||
print(' */') | ||
print('') | ||
|
||
# Compute the multipliers needed for CRC-32 folding with carryless | ||
# multiplication instructions that operate on the 64-bit halves of 128-bit | ||
# segments. Using the terminology from earlier, for each 64-bit fold | ||
# len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial multiplied by | ||
# a 32-bit one produces a 95-bit one. When A(x) is the low order polynomial | ||
# half of a 128-bit segments (high order physical half), the separation | ||
# between the message parts is the total length of the 128-bit segments | ||
# separating the values. When A(x) is the high order polynomial half, the | ||
# separation is 64 bits greater. | ||
for i in range(1, 33): | ||
sep_lo = 128 * (i - 1) | ||
sep_hi = sep_lo + 64 | ||
len_B = 95 | ||
for d in [sep_hi + len_B, # A(x) = high 64 polynomial bits (low 64 physical bits) | ||
sep_lo + len_B # A(x) = low 64 polynomial bits (high 64 physical bits) | ||
]: | ||
poly = bitreverse(x_to_the_d(d), 32) | ||
print(f'#define CRC32_X{d}_MODG 0x{poly:08x} /* x^{d} mod G(x) */') | ||
print('') | ||
|
||
# Compute constants for the final 128 => 32 bit reduction. | ||
poly = bitreverse(div(1 << 95, G), 64) | ||
print(f'#define CRC32_BARRETT_CONSTANT_1 0x{poly:016x}ULL /* floor(x^95 / G(x)) */') | ||
poly = bitreverse(G, 33) | ||
print(f'#define CRC32_BARRETT_CONSTANT_2 0x{poly:016x}ULL /* G(x) */') | ||
|
||
# Compute multipliers for combining the CRCs of separate chunks. | ||
print('') | ||
num_chunks = 4 | ||
table_len = 129 | ||
min_chunk_len = 128 | ||
print(f'#define CRC32_NUM_CHUNKS {num_chunks}') | ||
print(f'#define CRC32_MIN_VARIABLE_CHUNK_LEN {min_chunk_len}UL') | ||
print(f'#define CRC32_MAX_VARIABLE_CHUNK_LEN {(table_len-1) * min_chunk_len}UL') | ||
print('') | ||
print('/* Multipliers for implementations that use a variable chunk length */') | ||
print('static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {') | ||
print('\t{ 0 /* unused row */ },') | ||
for i in range(1, table_len): | ||
chunk_len = i * min_chunk_len | ||
print(f'\t/* chunk_len={chunk_len} */') | ||
print('\t{ ', end='') | ||
for j in range(num_chunks - 1, 0, -1): | ||
d = (j * 8 * chunk_len) - 33 | ||
poly = bitreverse(x_to_the_d(d), 32) | ||
print(f'0x{poly:08x} /* x^{d} mod G(x) */, ', end='') | ||
print('},') | ||
print('};') | ||
fixed_chunk_len = 32768 | ||
print('') | ||
print('/* Multipliers for implementations that use a large fixed chunk length */') | ||
print(f'#define CRC32_FIXED_CHUNK_LEN {fixed_chunk_len}UL') | ||
for j in range(1, num_chunks): | ||
d = (j * 8 * fixed_chunk_len) - 33 | ||
poly = bitreverse(x_to_the_d(d), 32) | ||
print(f'#define CRC32_FIXED_CHUNK_MULT_{j} 0x{poly:08x} /* x^{d} mod G(x) */') | ||
|
||
with open('lib/crc32_tables.h', 'w') as f: | ||
sys.stdout = f | ||
gen_tables() | ||
with open('lib/crc32_multipliers.h', 'w') as f: | ||
sys.stdout = f | ||
gen_multipliers() |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.
bd57e93
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is a rationale behind that?
Seems to me like adding another dependency, when 'old' code in already used language was seemingly fine.
bd57e93
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's no new dependency. The generated constants are checked into the repo, and always have been. Moreover, other data tables in libdeflate were already generated by Python scripts (see scripts/*.py). Unless you are developing libdeflate and want to change the corresponding data tables, there is no need to run any of these scripts.
The new Python script is half the length of the C programs it replaces (taking advantage of things like Python integers being arbitrary length) and is more convenient for me.