JuliaLang · yuyichao · May 18, 2017 · May 12, 2017 · May 12, 2017 · rfourquet
diff --git a/base/bitarray.jl b/base/bitarray.jl
@@ -283,11 +283,12 @@ function copy_to_bitarray_chunks!(Bc::Vector{UInt64}, pos_d::Int, C::Array{Bool}
     nc8 = (nc >>> 3) << 3
     if nc8 > 0
         ind8 = 1
-        C8 = reinterpret(UInt64, unsafe_wrap(Array, pointer(C, ind), nc8 << 6))
+        P8 = Ptr{UInt64}(pointer(C, ind)) # unaligned i64 pointer
         @inbounds for i = 1:nc8
             c = UInt64(0)
             for j = 0:7
-                c |= (pack8bools(C8[ind8]) << (j<<3))
+                # unaligned load
+                c |= (pack8bools(unsafe_load(P8, ind8)) << (j<<3))
                 ind8 += 1
             end
             Bc[bind] = c

diff --git a/base/dSFMT.jl b/base/dSFMT.jl
@@ -89,9 +89,13 @@ end
 
 # dSFMT jump
 function dsfmt_jump(s::DSFMT_state, jp::AbstractString)
-    index = s.val[end-1]
-    work = zeros(UInt64, JN32>>1)
-    dsfmt = reinterpret(UInt64, copy(s.val))
+    val = s.val
+    nval = length(val)
+    index = val[nval - 1]
+    work = zeros(UInt64, JN32 >> 1)
+    dsfmt = Vector{UInt64}(nval >> 1)
+    ccall(:memcpy, Ptr{Void}, (Ptr{UInt64}, Ptr{Int32}, Csize_t),
+          dsfmt, val, (nval - 1) * sizeof(Int32))
     dsfmt[end] = UInt64(N*2)
 
     for c in jp

diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl
@@ -1063,6 +1063,14 @@ For example,
 `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
 `Float32`.
 
+!!! warning
+
+    It is not allowed to `reinterpret` an array to an element type with a larger alignment then
+    the alignment of the array. For a normal `Array`, this is the alignment of its element type.
+    For a reinterpreted array, this is the alignment of the `Array` it was reinterpreted from.
+    For example, `reinterpret(UInt32, UInt8[0, 0, 0, 0])` is not allowed but
+    `reinterpret(UInt32, reinterpret(UInt8, Float32[1.0]))` is allowed.
+
 ```jldoctest
 julia> reinterpret(Float32, UInt32(7))
 1.0f-44

diff --git a/base/io.jl b/base/io.jl
@@ -215,29 +215,33 @@ readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp))
 
 ## byte-order mark, ntoh & hton ##
 
+let endian_boms = reinterpret(UInt8, UInt32[0x01020304])
+    global ntoh, hton, ltoh, htol
+    if endian_boms == UInt8[1:4;]
+        ntoh(x) = x
+        hton(x) = x
+        ltoh(x) = bswap(x)
+        htol(x) = bswap(x)
+        const global ENDIAN_BOM = 0x01020304
+    elseif endian_boms == UInt8[4:-1:1;]
+        ntoh(x) = bswap(x)
+        hton(x) = bswap(x)
+        ltoh(x) = x
+        htol(x) = x
+        const global ENDIAN_BOM = 0x04030201
+    else
+        error("seriously? what is this machine?")
+    end
+end
+
 """
     ENDIAN_BOM
 
 The 32-bit byte-order-mark indicates the native byte order of the host machine.
 Little-endian machines will contain the value `0x04030201`. Big-endian machines will contain
 the value `0x01020304`.
 """
-const ENDIAN_BOM = reinterpret(UInt32,UInt8[1:4;])[1]
-
-if ENDIAN_BOM == 0x01020304
-    ntoh(x) = x
-    hton(x) = x
-    ltoh(x) = bswap(x)
-    htol(x) = bswap(x)
-elseif ENDIAN_BOM == 0x04030201
-    ntoh(x) = bswap(x)
-    hton(x) = bswap(x)
-    ltoh(x) = x
-    htol(x) = x
-else
-    error("seriously? what is this machine?")
-end
-
+ENDIAN_BOM
 
 """
     ntoh(x)

diff --git a/base/pcre.jl b/base/pcre.jl
@@ -69,18 +69,18 @@ const OPTIONS_MASK = COMPILE_MASK | EXECUTE_MASK
 
 const UNSET = ~Csize_t(0)  # Indicates that an output vector element is unset
 
-function info(regex::Ptr{Void}, what::Integer, T)
-    buf = zeros(UInt8,sizeof(T))
+function info(regex::Ptr{Void}, what::Integer, ::Type{T}) where T
+    buf = Ref{T}()
     ret = ccall((:pcre2_pattern_info_8, PCRE_LIB), Int32,
-                (Ptr{Void}, Int32, Ptr{UInt8}),
+                (Ptr{Void}, Int32, Ptr{Void}),
                 regex, what, buf)
     if ret != 0
         error(ret == ERROR_NULL      ? "NULL regex object" :
               ret == ERROR_BADMAGIC  ? "invalid regex object" :
               ret == ERROR_BADOPTION ? "invalid option flags" :
                                        "unknown error $ret")
     end
-    reinterpret(T,buf)[1]
+    buf[]
 end
 
 function get_ovec(match_data)

diff --git a/src/array.c b/src/array.c
@@ -188,10 +188,19 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
     a->offset = 0;
     a->data = NULL;
     a->flags.isaligned = data->flags.isaligned;
+    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
     jl_value_t *el_type = jl_tparam0(atype);
     assert(store_unboxed(el_type) == !data->flags.ptrarray);
     if (!data->flags.ptrarray) {
         a->elsize = jl_datatype_size(el_type);
+        unsigned align = ((jl_datatype_t*)el_type)->layout->alignment;
+        jl_value_t *ownerty = jl_typeof(owner);
+        unsigned oldalign = (ownerty == (jl_value_t*)jl_string_type ? 1 :
+                             ((jl_datatype_t*)jl_tparam0(ownerty))->layout->alignment);
+        if (oldalign < align)
+            jl_exceptionf(jl_argumenterror_type,
+                          "reinterpret from alignment %u bytes to alignment %u bytes not allowed",
+                          oldalign, align);
         a->flags.ptrarray = 0;
     }
     else {
@@ -201,7 +210,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
 
     // if data is itself a shared wrapper,
     // owner should point back to the original array
-    jl_array_data_owner(a) = jl_array_owner(data);
+    jl_array_data_owner(a) = (jl_value_t*)owner;
 
     a->flags.how = 3;
     a->data = data->data;
@@ -266,15 +275,22 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
     jl_ptls_t ptls = jl_get_ptls_states();
-    size_t elsz;
     jl_array_t *a;
     jl_value_t *el_type = jl_tparam0(atype);
 
     int isunboxed = store_unboxed(el_type);
-    if (isunboxed)
+    size_t elsz;
+    unsigned align;
+    if (isunboxed) {
         elsz = jl_datatype_size(el_type);
-    else
-        elsz = sizeof(void*);
+        align = ((jl_datatype_t*)el_type)->layout->alignment;
+    }
+    else {
+        align = elsz = sizeof(void*);
+    }
+    if (((uintptr_t)data) & (align - 1))
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(1);
     int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
@@ -309,7 +325,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
     jl_ptls_t ptls = jl_get_ptls_states();
-    size_t elsz, nel = 1;
+    size_t nel = 1;
     jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
     wideint_t prod;
@@ -326,10 +342,18 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
     jl_value_t *el_type = jl_tparam0(atype);
 
     int isunboxed = store_unboxed(el_type);
-    if (isunboxed)
+    size_t elsz;
+    unsigned align;
+    if (isunboxed) {
         elsz = jl_datatype_size(el_type);
-    else
-        elsz = sizeof(void*);
+        align = ((jl_datatype_t*)el_type)->layout->alignment;
+    }
+    else {
+        align = elsz = sizeof(void*);
+    }
+    if (((uintptr_t)data) & (align - 1))
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(ndims);
     int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);

diff --git a/test/core.jl b/test/core.jl
@@ -952,7 +952,15 @@ let
     @test aa == a
     aa = unsafe_wrap(Array, pointer(a), UInt16(length(a)))
     @test aa == a
+    aaa = unsafe_wrap(Array, pointer(a), (1, 1))
+    @test size(aaa) == (1, 1)
+    @test aaa[1] == a[1]
     @test_throws InexactError unsafe_wrap(Array, pointer(a), -3)
+    # Misaligned pointer
+    res = @test_throws ArgumentError unsafe_wrap(Array, pointer(a) + 1, length(a))
+    @test contains(res.value.msg, "is not properly aligned to $(sizeof(Int)) bytes")
+    res = @test_throws ArgumentError unsafe_wrap(Array, pointer(a) + 1, (1, 1))
+    @test contains(res.value.msg, "is not properly aligned to $(sizeof(Int)) bytes")
 end
 
 struct FooBar2515
@@ -4848,12 +4856,15 @@ end
 let ni128 = sizeof(FP128test) ÷ sizeof(Int),
     ns128 = sizeof(FP128align) ÷ sizeof(Int),
     nbit = sizeof(Int) * 8,
-    arr = reinterpret(FP128align, collect(Int, 1:(2 * ns128))),
+    arr = Vector{FP128align}(2),
     offset = Base.datatype_alignment(FP128test) ÷ sizeof(Int),
     little,
-    expected
+    expected,
+    arrint = reinterpret(Int, arr)
+
+    @test length(arrint) == 2 * ns128
+    arrint .= 1:(2 * ns128)
     @test sizeof(FP128test) == 16
-    @test length(arr) == 2
     @test arr[1].i == 1
     @test arr[2].i == 1 + ns128
     expected = UInt128(0)
@@ -4903,3 +4914,20 @@ mutable struct T21719{V}
 end
 g21719(f, goal; tol = 1e-6) = T21719(f, tol, goal)
 @test isa(g21719(identity, 1.0; tol=0.1), T21719)
+
+# reinterpret alignment requirement
+let arr8 = zeros(UInt8, 16),
+    arr64 = zeros(UInt64, 2),
+    arr64_8 = reinterpret(UInt8, arr64),
+    arr64_i
+
+    # Not allowed to reinterpret arrays allocated as UInt8 array to a Int32 array
+    res = @test_throws ArgumentError reinterpret(Int32, arr8)
+    @test res.value.msg == "reinterpret from alignment 1 bytes to alignment 4 bytes not allowed"
+    # OK to reinterpret arrays allocated as UInt64 array to a Int64 array even though
+    # it is passed as a UInt8 array
+    arr64_i = reinterpret(Int64, arr64_8)
+    @test arr8 == arr64_8
+    arr64_i[2] = 1234
+    @test arr64[2] == 1234
+end
diff --git a/test/misc.jl b/test/misc.jl
@@ -678,3 +678,26 @@ let foo() = begin
     end
     @test foo() == 2
 end
+
+# Endian tests
+# For now, we only support little endian.
+# Add an `Sys.ARCH` test for big endian when/if we add support for that.
+# Do **NOT** use `ENDIAN_BOM` to figure out the endianess
+# since that's exactly what we want to test.
+@test ENDIAN_BOM == 0x04030201
+@test ntoh(0x1) == 0x1
+@test hton(0x1) == 0x1
+@test ltoh(0x1) == 0x1
+@test htol(0x1) == 0x1
+@test ntoh(0x102) == 0x201
+@test hton(0x102) == 0x201
+@test ltoh(0x102) == 0x102
+@test htol(0x102) == 0x102
+@test ntoh(0x1020304) == 0x4030201
+@test hton(0x1020304) == 0x4030201
+@test ltoh(0x1020304) == 0x1020304
+@test htol(0x1020304) == 0x1020304
+@test ntoh(0x102030405060708) == 0x807060504030201
+@test hton(0x102030405060708) == 0x807060504030201
+@test ltoh(0x102030405060708) == 0x102030405060708
+@test htol(0x102030405060708) == 0x102030405060708