Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix and enforce reinterpret/unsafe_wrap alignment #21831

Merged
merged 2 commits into from
May 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions base/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,12 @@ function copy_to_bitarray_chunks!(Bc::Vector{UInt64}, pos_d::Int, C::Array{Bool}
nc8 = (nc >>> 3) << 3
if nc8 > 0
ind8 = 1
C8 = reinterpret(UInt64, unsafe_wrap(Array, pointer(C, ind), nc8 << 6))
P8 = Ptr{UInt64}(pointer(C, ind)) # unaligned i64 pointer
@inbounds for i = 1:nc8
c = UInt64(0)
for j = 0:7
c |= (pack8bools(C8[ind8]) << (j<<3))
# unaligned load
c |= (pack8bools(unsafe_load(P8, ind8)) << (j<<3))
ind8 += 1
end
Bc[bind] = c
Expand Down
10 changes: 7 additions & 3 deletions base/dSFMT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,13 @@ end

# dSFMT jump
function dsfmt_jump(s::DSFMT_state, jp::AbstractString)
index = s.val[end-1]
work = zeros(UInt64, JN32>>1)
dsfmt = reinterpret(UInt64, copy(s.val))
val = s.val
nval = length(val)
index = val[nval - 1]
work = zeros(UInt64, JN32 >> 1)
dsfmt = Vector{UInt64}(nval >> 1)
ccall(:memcpy, Ptr{Void}, (Ptr{UInt64}, Ptr{Int32}, Csize_t),
dsfmt, val, (nval - 1) * sizeof(Int32))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the last location of dsfmt::Vector{UInt64} below is overwritten, isn't it enough to copy (nval-2)*sizeof(Int32) in the memcpy here? Of course nothing wrong either.

dsfmt[end] = UInt64(N*2)

for c in jp
Expand Down
8 changes: 8 additions & 0 deletions base/docs/helpdb/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,14 @@ For example,
`reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
`Float32`.

!!! warning

It is not allowed to `reinterpret` an array to an element type with a larger alignment then
the alignment of the array. For a normal `Array`, this is the alignment of its element type.
For a reinterpreted array, this is the alignment of the `Array` it was reinterpreted from.
For example, `reinterpret(UInt32, UInt8[0, 0, 0, 0])` is not allowed but
`reinterpret(UInt32, reinterpret(UInt8, Float32[1.0]))` is allowed.

```jldoctest
julia> reinterpret(Float32, UInt32(7))
1.0f-44
Expand Down
36 changes: 20 additions & 16 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,29 +215,33 @@ readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp))

## byte-order mark, ntoh & hton ##

let endian_boms = reinterpret(UInt8, UInt32[0x01020304])
global ntoh, hton, ltoh, htol
if endian_boms == UInt8[1:4;]
ntoh(x) = x
hton(x) = x
ltoh(x) = bswap(x)
htol(x) = bswap(x)
const global ENDIAN_BOM = 0x01020304
elseif endian_boms == UInt8[4:-1:1;]
ntoh(x) = bswap(x)
hton(x) = bswap(x)
ltoh(x) = x
htol(x) = x
const global ENDIAN_BOM = 0x04030201
else
error("seriously? what is this machine?")
end
end

"""
ENDIAN_BOM
The 32-bit byte-order-mark indicates the native byte order of the host machine.
Little-endian machines will contain the value `0x04030201`. Big-endian machines will contain
the value `0x01020304`.
"""
const ENDIAN_BOM = reinterpret(UInt32,UInt8[1:4;])[1]

if ENDIAN_BOM == 0x01020304
ntoh(x) = x
hton(x) = x
ltoh(x) = bswap(x)
htol(x) = bswap(x)
elseif ENDIAN_BOM == 0x04030201
ntoh(x) = bswap(x)
hton(x) = bswap(x)
ltoh(x) = x
htol(x) = x
else
error("seriously? what is this machine?")
end

ENDIAN_BOM

"""
ntoh(x)
Expand Down
8 changes: 4 additions & 4 deletions base/pcre.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,18 @@ const OPTIONS_MASK = COMPILE_MASK | EXECUTE_MASK

const UNSET = ~Csize_t(0) # Indicates that an output vector element is unset

function info(regex::Ptr{Void}, what::Integer, T)
buf = zeros(UInt8,sizeof(T))
function info(regex::Ptr{Void}, what::Integer, ::Type{T}) where T
buf = Ref{T}()
ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Int32,
(Ptr{Void}, Int32, Ptr{UInt8}),
(Ptr{Void}, Int32, Ptr{Void}),
regex, what, buf)
if ret != 0
error(ret == ERROR_NULL ? "NULL regex object" :
ret == ERROR_BADMAGIC ? "invalid regex object" :
ret == ERROR_BADOPTION ? "invalid option flags" :
"unknown error $ret")
end
reinterpret(T,buf)[1]
buf[]
end

function get_ovec(match_data)
Expand Down
42 changes: 33 additions & 9 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,19 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
a->offset = 0;
a->data = NULL;
a->flags.isaligned = data->flags.isaligned;
jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
jl_value_t *el_type = jl_tparam0(atype);
assert(store_unboxed(el_type) == !data->flags.ptrarray);
if (!data->flags.ptrarray) {
a->elsize = jl_datatype_size(el_type);
unsigned align = ((jl_datatype_t*)el_type)->layout->alignment;
jl_value_t *ownerty = jl_typeof(owner);
unsigned oldalign = (ownerty == (jl_value_t*)jl_string_type ? 1 :
((jl_datatype_t*)jl_tparam0(ownerty))->layout->alignment);
if (oldalign < align)
jl_exceptionf(jl_argumenterror_type,
"reinterpret from alignment %u bytes to alignment %u bytes not allowed",
oldalign, align);
a->flags.ptrarray = 0;
}
else {
Expand All @@ -201,7 +210,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,

// if data is itself a shared wrapper,
// owner should point back to the original array
jl_array_data_owner(a) = jl_array_owner(data);
jl_array_data_owner(a) = (jl_value_t*)owner;

a->flags.how = 3;
a->data = data->data;
Expand Down Expand Up @@ -266,15 +275,22 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
size_t nel, int own_buffer)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t elsz;
jl_array_t *a;
jl_value_t *el_type = jl_tparam0(atype);

int isunboxed = store_unboxed(el_type);
if (isunboxed)
size_t elsz;
unsigned align;
if (isunboxed) {
elsz = jl_datatype_size(el_type);
else
elsz = sizeof(void*);
align = ((jl_datatype_t*)el_type)->layout->alignment;
}
else {
align = elsz = sizeof(void*);
}
if (((uintptr_t)data) & (align - 1))
jl_exceptionf(jl_argumenterror_type,
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);

int ndimwords = jl_array_ndimwords(1);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
Expand Down Expand Up @@ -309,7 +325,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
jl_value_t *_dims, int own_buffer)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t elsz, nel = 1;
size_t nel = 1;
jl_array_t *a;
size_t ndims = jl_nfields(_dims);
wideint_t prod;
Expand All @@ -326,10 +342,18 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
jl_value_t *el_type = jl_tparam0(atype);

int isunboxed = store_unboxed(el_type);
if (isunboxed)
size_t elsz;
unsigned align;
if (isunboxed) {
elsz = jl_datatype_size(el_type);
else
elsz = sizeof(void*);
align = ((jl_datatype_t*)el_type)->layout->alignment;
}
else {
align = elsz = sizeof(void*);
}
if (((uintptr_t)data) & (align - 1))
jl_exceptionf(jl_argumenterror_type,
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
Expand Down
34 changes: 31 additions & 3 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,15 @@ let
@test aa == a
aa = unsafe_wrap(Array, pointer(a), UInt16(length(a)))
@test aa == a
aaa = unsafe_wrap(Array, pointer(a), (1, 1))
@test size(aaa) == (1, 1)
@test aaa[1] == a[1]
@test_throws InexactError unsafe_wrap(Array, pointer(a), -3)
# Misaligned pointer
res = @test_throws ArgumentError unsafe_wrap(Array, pointer(a) + 1, length(a))
@test contains(res.value.msg, "is not properly aligned to $(sizeof(Int)) bytes")
res = @test_throws ArgumentError unsafe_wrap(Array, pointer(a) + 1, (1, 1))
@test contains(res.value.msg, "is not properly aligned to $(sizeof(Int)) bytes")
end

struct FooBar2515
Expand Down Expand Up @@ -4848,12 +4856,15 @@ end
let ni128 = sizeof(FP128test) ÷ sizeof(Int),
ns128 = sizeof(FP128align) ÷ sizeof(Int),
nbit = sizeof(Int) * 8,
arr = reinterpret(FP128align, collect(Int, 1:(2 * ns128))),
arr = Vector{FP128align}(2),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be larger than 2 on 32 bit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. The original size is 2 * ns128 * sizeof(Int) ÷ sizeof(FP128align) == 2 * sizeof(FP128align) ÷ sizeof(Int) * sizeof(Int) ÷ sizeof(FP128align) == 2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense – we want to allocate this with the larger alignment (FP128align), but fill it with the smaller alignment (Int)

offset = Base.datatype_alignment(FP128test) ÷ sizeof(Int),
little,
expected
expected,
arrint = reinterpret(Int, arr)

@test length(arrint) == 2 * ns128
arrint .= 1:(2 * ns128)
@test sizeof(FP128test) == 16
@test length(arr) == 2
@test arr[1].i == 1
@test arr[2].i == 1 + ns128
expected = UInt128(0)
Expand Down Expand Up @@ -4903,3 +4914,20 @@ mutable struct T21719{V}
end
g21719(f, goal; tol = 1e-6) = T21719(f, tol, goal)
@test isa(g21719(identity, 1.0; tol=0.1), T21719)

# reinterpret alignment requirement
let arr8 = zeros(UInt8, 16),
arr64 = zeros(UInt64, 2),
arr64_8 = reinterpret(UInt8, arr64),
arr64_i

# Not allowed to reinterpret arrays allocated as UInt8 array to a Int32 array
res = @test_throws ArgumentError reinterpret(Int32, arr8)
@test res.value.msg == "reinterpret from alignment 1 bytes to alignment 4 bytes not allowed"
# OK to reinterpret arrays allocated as UInt64 array to a Int64 array even though
# it is passed as a UInt8 array
arr64_i = reinterpret(Int64, arr64_8)
@test arr8 == arr64_8
arr64_i[2] = 1234
@test arr64[2] == 1234
end
23 changes: 23 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -678,3 +678,26 @@ let foo() = begin
end
@test foo() == 2
end

# Endian tests
# For now, we only support little endian.
# Add an `Sys.ARCH` test for big endian when/if we add support for that.
# Do **NOT** use `ENDIAN_BOM` to figure out the endianess
# since that's exactly what we want to test.
@test ENDIAN_BOM == 0x04030201
@test ntoh(0x1) == 0x1
@test hton(0x1) == 0x1
@test ltoh(0x1) == 0x1
@test htol(0x1) == 0x1
@test ntoh(0x102) == 0x201
@test hton(0x102) == 0x201
@test ltoh(0x102) == 0x102
@test htol(0x102) == 0x102
@test ntoh(0x1020304) == 0x4030201
@test hton(0x1020304) == 0x4030201
@test ltoh(0x1020304) == 0x1020304
@test htol(0x1020304) == 0x1020304
@test ntoh(0x102030405060708) == 0x807060504030201
@test hton(0x102030405060708) == 0x807060504030201
@test ltoh(0x102030405060708) == 0x102030405060708
@test htol(0x102030405060708) == 0x102030405060708