Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

incorporate upstream fixes to crc32c.c #52326

Merged
merged 10 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 23 additions & 16 deletions src/crc32c.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* crc32c.c -- compute CRC-32C using software table or available hardware instructions
* Copyright (C) 2013 Mark Adler
* Version 1.1 1 Aug 2013 Mark Adler
* Copyright (C) 2013, 2021 Mark Adler
* Version 1.1 1 Aug 2013 Mark Adler, updates from Version 1.2 5 June 2021
*
* Code retrieved in August 2016 from August 2013 post by Mark Adler on
* http://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
Expand All @@ -10,6 +10,7 @@
* - architecture and compiler detection
* - precompute crc32c tables and store in a generated .c file
* - ARMv8 support
* Updated to incorporate upstream 2021 patch by Mark Adler to register constraints.
*/

/*
Expand Down Expand Up @@ -39,6 +40,8 @@
/* Version history:
1.0 10 Feb 2013 First version
1.1 1 Aug 2013 Correct comments on why three crc instructions in parallel
1.2 5 Jun 2021 Correct register constraints on assembly instructions
(+ other changes that were superfluous for us)
*/

#include "julia.h"
Expand Down Expand Up @@ -98,16 +101,14 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
to an eight-byte boundary */
while (len && ((uintptr_t)buf & 7) != 0) {
__asm__("crc32b\t" "(%1), %0"
: "=r"(crc0)
: "r"(buf), "0"(crc0));
: "+r"(crc0)
: "r"(buf), "m"(*buf));
buf++;
len--;
}

/* compute the crc on sets of LONG*3 bytes, executing three independent crc
instructions, each on LONG bytes -- this is optimized for the Nehalem,
Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
throughput of one crc per cycle, but a latency of three cycles */
/* compute the crc on sets of LONG*3 bytes,
making use of three ALUs in parallel on a single core. */
while (len >= LONG * 3) {
uintptr_t crc1 = 0;
uintptr_t crc2 = 0;
Expand All @@ -116,8 +117,11 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
__asm__(CRC32_PTR "\t" "(%3), %0\n\t"
CRC32_PTR "\t" LONGx1 "(%3), %1\n\t"
CRC32_PTR "\t" LONGx2 "(%3), %2"
: "=r"(crc0), "=r"(crc1), "=r"(crc2)
: "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
: "+r"(crc0), "+r"(crc1), "+r"(crc2)
: "r"(buf),
"m"(* (const char (*)[sizeof(void*)]) &buf[0]),
"m"(* (const char (*)[sizeof(void*)]) &buf[LONG]),
"m"(* (const char (*)[sizeof(void*)]) &buf[LONG*2]));
buf += sizeof(void*);
} while (buf < end);
crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1;
Expand All @@ -136,8 +140,11 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
__asm__(CRC32_PTR "\t" "(%3), %0\n\t"
CRC32_PTR "\t" SHORTx1 "(%3), %1\n\t"
CRC32_PTR "\t" SHORTx2 "(%3), %2"
: "=r"(crc0), "=r"(crc1), "=r"(crc2)
: "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
: "+r"(crc0), "+r"(crc1), "+r"(crc2)
: "r"(buf),
"m"(* (const char (*)[sizeof(void*)]) &buf[0]),
"m"(* (const char (*)[sizeof(void*)]) &buf[SHORT]),
"m"(* (const char (*)[sizeof(void*)]) &buf[SHORT*2]));
buf += sizeof(void*);
} while (buf < end);
crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1;
Expand All @@ -151,17 +158,17 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
const char *end = buf + (len - (len & 7));
while (buf < end) {
__asm__(CRC32_PTR "\t" "(%1), %0"
: "=r"(crc0)
: "r"(buf), "0"(crc0));
: "+r"(crc0)
: "r"(buf), "m"(* (const char (*)[sizeof(void*)]) buf));
buf += sizeof(void*);
}
len &= 7;

/* compute the crc for up to seven trailing bytes */
while (len) {
__asm__("crc32b\t" "(%1), %0"
: "=r"(crc0)
: "r"(buf), "0"(crc0));
: "+r"(crc0)
: "r"(buf), "m"(*buf));
buf++;
len--;
}
Expand Down
9 changes: 9 additions & 0 deletions stdlib/CRC32c/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ function test_crc32c(crc32c)
rm(f, force=true)
end
end

# test longer arrays to cover all the code paths in crc32c.c
LONG = 8192 # from crc32c.c
SHORT = 256 # from crc32c.c
n = LONG*3+SHORT*3+SHORT*2+64+7
big = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
@test crc == crc32c(@view big[1+offset:end])
end
end
unsafe_crc32c_sw(a, n, crc) =
ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
Expand Down