Skip to content

Commit f285de5

Browse files
nhz2stevengj
andauthored
[CRC32c] Support AbstractVector{UInt8} as input (#56164)
This is a similar PR to JuliaIO/CRC32.jl#12 I added a generic fallback method for `AbstractVector{UInt8}` similar to the existing generic `IO` method. Co-authored-by: Steven G. Johnson <stevenj@mit.edu>
1 parent 20f933a commit f285de5

File tree

2 files changed

+49
-8
lines changed

2 files changed

+49
-8
lines changed

stdlib/CRC32c/src/CRC32c.jl

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ See [`CRC32c.crc32c`](@ref) for more information.
77
"""
88
module CRC32c
99

10-
import Base.FastContiguousSubArray
1110
import Base: DenseBytes
1211

1312
export crc32c
@@ -16,9 +15,9 @@ export crc32c
1615
crc32c(data, crc::UInt32=0x00000000)
1716
1817
Compute the CRC-32c checksum of the given `data`, which can be
19-
an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass
20-
a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
21-
can be used to compute a checksum on data divided into chunks: performing
18+
an `Array{UInt8}`, a contiguous subarray thereof, an `AbstractVector{UInt8}`, or a `String`.
19+
Optionally, you can pass a starting `crc` integer to be mixed in with the checksum.
20+
The `crc` parameter can be used to compute a checksum on data divided into chunks: performing
2221
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
2322
(Technically, a little-endian checksum is computed.)
2423
@@ -30,11 +29,26 @@ calling [`take!`](@ref).
3029
3130
For a `String`, note that the result is specific to the UTF-8 encoding
3231
(a different checksum would be obtained from a different Unicode encoding).
33-
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
32+
To checksum an `a::AbstractArray` of some other bitstype without padding,
33+
you can do `crc32c(vec(reinterpret(UInt8,a)))`,
3434
but note that the result may be endian-dependent.
3535
"""
3636
function crc32c end
3737

38+
function crc32c(a::AbstractVector{UInt8}, crc::UInt32=0x00000000)
39+
# use block size 24576=8192*3, since that is the threshold for
40+
# 3-way parallel SIMD code in the underlying jl_crc32c C function.
41+
last = lastindex(a)
42+
nb = length(a)
43+
buf = Memory{UInt8}(undef, Int(min(nb, 24576)))
44+
while nb > 0
45+
n = min(nb, 24576)
46+
copyto!(buf, 1, a, last - nb + 1, n)
47+
crc = Base.unsafe_crc32c(buf, n % Csize_t, crc)
48+
nb -= n
49+
end
50+
return crc
51+
end
3852

3953
function crc32c(a::DenseBytes, crc::UInt32=0x00000000)
4054
Base._crc32c(a, crc)
@@ -51,6 +65,5 @@ mixed with a starting `crc` integer. If `nb` is not supplied, then
5165
"""
5266
crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) = Base._crc32c(io, nb, crc)
5367
crc32c(io::IO, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
54-
crc32c(io::IOStream, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
5568

5669
end

stdlib/CRC32c/test/runtests.jl

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,23 @@
33
using Test, Random
44
using CRC32c
55

6+
const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
7+
isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
8+
using .Main.OffsetArrays: Origin
9+
10+
isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
11+
using .Main.FillArrays: Fill
12+
613
function test_crc32c(crc32c)
714
# CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
815
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
916
s = String(UInt8[1:n;])
1017
ss = SubString(String(UInt8[0:(n+1);]), 2:(n+1))
1118
@test crc32c(UInt8[1:n;]) == crc == crc32c(s) == crc32c(ss)
19+
@test crc == crc32c(UInt8(1):UInt8(n))
20+
m = Memory{UInt8}(undef, n)
21+
m .= 1:n
22+
@test crc == crc32c(m)
1223
end
1324

1425
# test that crc parameter is equivalent to checksum of concatenated data,
@@ -50,9 +61,24 @@ function test_crc32c(crc32c)
5061
LONG = 8192 # from crc32c.c
5162
SHORT = 256 # from crc32c.c
5263
n = LONG*3+SHORT*3+SHORT*2+64+7
53-
big = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
64+
bigg = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
5465
for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
55-
@test crc == crc32c(@view big[1+offset:end])
66+
@test crc == crc32c(@view bigg[1+offset:end])
67+
end
68+
69+
# test crc of AbstractVector{UInt8}
70+
@test crc32c(Origin(0)(b"hello")) == crc32c(b"hello")
71+
weird_vectors = [
72+
view(rand(UInt8, 300000), 1:2:300000),
73+
vec(reinterpret(UInt8, collect(Int64(1):Int64(4)))),
74+
vec(reinterpret(UInt8, Int64(1):Int64(4))),
75+
view([0x01, 0x02], UInt(1):UInt(2)),
76+
Fill(0x00, UInt(100)),
77+
Fill(0x00, big(100)),
78+
reinterpret(UInt8, BitVector((true, false, true, false))),
79+
]
80+
for a in weird_vectors
81+
@test crc32c(a) == crc32c(collect(a))
5682
end
5783
end
5884
unsafe_crc32c_sw(a, n, crc) =
@@ -64,6 +90,8 @@ function crc32c_sw(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
6490
unsafe_crc32c_sw(s, sizeof(s), crc)
6591
end
6692

93+
crc32c_sw(a::AbstractVector{UInt8}, crc::UInt32=0x00000000) =
94+
crc32c_sw(copyto!(Vector{UInt8}(undef, length(a)), a))
6795
function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
6896
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
6997
buf = Vector{UInt8}(undef, min(nb, 24576))

0 commit comments

Comments
 (0)