Skip to content

Commit

Permalink
Support aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
dinosaure committed Dec 6, 2021
1 parent 2e1f6d7 commit b9996f9
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 30 deletions.
5 changes: 4 additions & 1 deletion conf/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ cpuid(unsigned func, unsigned subfunc, unsigned cpuinfo[4])
__cpuidex(cpuinfo, func, subfunc)
}
#else
#error Unsupported compiler
// XXX(dinosaure): [aarch64] branch
static inline void
cpuid(unsigned func, unsigned subfunc, unsigned cpuinfo[4])
{ }
#endif

#ifndef bit_CLFLUSH
Expand Down
35 changes: 24 additions & 11 deletions conf/flush.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,30 @@ external is_cpu_clwb_present
: unit -> bool
= "is_cpu_clwb_present"

open Configurator.V1.C_define.Value

let __aarch64__ = "__aarch64__"

let _ =
let clflush = is_cpu_clflush_present () in
let clflushopt = is_cpu_clflushopt_present () in
let clwb = is_cpu_clwb_present () in
let c = Configurator.V1.create "sse" in
let defines = Configurator.V1.C_define.import
c ~includes:[] [ (__aarch64__, Switch) ] in
match List.assoc_opt __aarch64__ defines with
| Some (Switch true) ->
Format.printf "dc cvac: true\n%!" ;
let flags = [ "-DART_DC_CVAC" ] in
Configurator.V1.Flags.write_sexp "flush.sexp" flags
| _ ->
let clflush = is_cpu_clflush_present () in
let clflushopt = is_cpu_clflushopt_present () in
let clwb = is_cpu_clwb_present () in

Format.printf "clflush: %b\n%!" clflush ;
Format.printf "clflushopt: %b\n%!" clflushopt ;
Format.printf "clwb: %b\n%!" clwb ;
Format.printf "clflush: %b\n%!" clflush ;
Format.printf "clflushopt: %b\n%!" clflushopt ;
Format.printf "clwb: %b\n%!" clwb ;

let flags = [] in
let flags = if clflush then "-DART_CLFLUSH" :: flags else flags in
let flags = if clflushopt then "-DART_CLFLUSHOPT" :: flags else flags in
let flags = if clwb then "-DART_CLWB" :: flags else flags in
Configurator.V1.Flags.write_sexp "flush.sexp" flags
let flags = [] in
let flags = if clflush then "-DART_CLFLUSH" :: flags else flags in
let flags = if clflushopt then "-DART_CLFLUSHOPT" :: flags else flags in
let flags = if clwb then "-DART_CLWB" :: flags else flags in
Configurator.V1.Flags.write_sexp "flush.sexp" flags
60 changes: 60 additions & 0 deletions lib/persistent.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <caml/bigarray.h>
#include <caml/memory.h>
#include <caml/address_class.h>
#include <assert.h>

#if defined(HAS_STDATOMIC_H)
#include <stdatomic.h>
Expand All @@ -24,6 +25,9 @@ typedef enum memory_order {
#error "C11 atomics are unavailable on this platform."
#endif

#define is_aligned(ptr, byte_count) \
(((uintptr_t)(const void *)(ptr)) % (byte_count) == 0)

#define memory_uint8_off(src, off) \
((uint8_t *) ((uint8_t *) Caml_ba_data_val (src) + Unsigned_long_val (off)))

Expand Down Expand Up @@ -70,6 +74,9 @@ caml_atomic_set_uint8(value memory, value addr, value v)
CAMLprim value
caml_atomic_get_leuintnat(value memory, value addr)
{
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uintnat)));
#endif
uintnat v = atomic_load_explicit(memory_uintnat_off (memory, addr), memory_order_seq_cst) ;
#if defined(ART_BIG_ENDIAN) && defined(__ARCH_SIXTYFOUR)
v = __bswap_64 (v) ;
Expand All @@ -87,6 +94,9 @@ caml_atomic_set_leuintnat(value memory, value addr, value v)
x = __bswap_64 (x);
#elif defined(ART_BIG_ENDIAN)
x = __bswap_32 (x) ;
#endif
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uintnat)));
#endif
atomic_store_explicit(memory_uintnat_off (memory, addr), x, memory_order_seq_cst) ;
return Val_unit ;
Expand All @@ -95,6 +105,9 @@ caml_atomic_set_leuintnat(value memory, value addr, value v)
CAMLprim value
caml_atomic_get_leuint16(value memory, value addr)
{
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint16_t)));
#endif
uint16_t v = atomic_load_explicit(memory_uint16_off (memory, addr), memory_order_seq_cst) ;
#if defined(ART_BIG_ENDIAN)
v = __bswap_16 (v) ;
Expand All @@ -108,6 +121,9 @@ caml_atomic_set_leuint16(value memory, value addr, value v)
uint16_t x = Unsigned_long_val (v) ;
#if defined(ART_BIG_ENDIAN)
x = __bswap_16 (x) ;
#endif
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint16_t)));
#endif
atomic_store_explicit(memory_uint16_off (memory, addr), x, memory_order_seq_cst) ;
return Val_unit ;
Expand All @@ -116,6 +132,9 @@ caml_atomic_set_leuint16(value memory, value addr, value v)
CAMLprim value
caml_atomic_get_leuint31(value memory, value addr)
{
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint32_t)));
#endif
uint32_t v = atomic_load_explicit(memory_uint32_off (memory, addr), memory_order_seq_cst) ;
#if defined(ART_BIG_ENDIAN)
v = __bswap_32 (v) ;
Expand All @@ -129,6 +148,9 @@ caml_atomic_set_leuint31(value memory, value addr, value v)
uint32_t x = Unsigned_long_val (v) ;
#if defined(ART_BIG_ENDIAN)
x = __bswap_32 (x) ;
#endif
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint32_t)));
#endif
atomic_store_explicit(memory_uint32_off (memory, addr), (x & 0x7fffffff), memory_order_seq_cst) ;
return Val_unit ;
Expand All @@ -137,6 +159,9 @@ caml_atomic_set_leuint31(value memory, value addr, value v)
uint64_t
caml_atomic_get_leuint64(value memory, value addr)
{
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint64_t)));
#endif
uint64_t v = atomic_load_explicit(memory_uint64_off (memory, addr), memory_order_seq_cst) ;
#if defined(ART_BIG_ENDIAN)
v = __bswap_64 (v) ;
Expand All @@ -149,6 +174,9 @@ caml_atomic_set_leuint64(value memory, value addr, uint64_t x)
{
#if defined(ART_BIG_ENDIAN)
x = __bswap_64 (x) ;
#endif
#if defined(__aarch64__)
assert(is_aligned(memory_uintnat_off (memory, addr), sizeof(uint64_t)));
#endif
atomic_store_explicit(memory_uint64_off (memory, addr), x, memory_order_seq_cst) ;
return Val_unit ;
Expand Down Expand Up @@ -290,6 +318,13 @@ caml_get_leintnat(value memory, value addr)
#endif
}

/* XXX(dinosaure): instr. below should appears only
* according to [ART_{CLWB,CLFLUSHOPT,CLFLUSH,DC_CVAC}]
* and the architecture of the host (target?) system.
*
* Currently, [ART_CLWB] can appear even if we are on
* [__arch64__] architecture which is wrong. */

#ifdef ART_CLWB
void clwb(const void *ptr) {
asm volatile ("clwb %0" : "+m" (ptr));
Expand All @@ -314,6 +349,31 @@ caml_persist(value memory, value addr, value len)
sfence();
return Val_unit ;
}
#elif ART_DC_CVAC
void dc_cvac(const void *ptr) {
asm volatile("dc cvac, %0" :: "r" (ptr) : "memory");
}

void dc_cvac_range(const void *ptr, uint64_t len) {
uintptr_t start = (uintptr_t) ptr & ~(64 - 1);
// XXX(dinosaure): assume cache-line = 64 on aarch64
for (; start < (uintptr_t)ptr + len; start += 64) {
dc_cvac((void *) start);
}
}

void sfence() {
asm volatile("dmb ishst" ::: "memory");
}

CAMLprim value
caml_persist(value memory, value addr, value len)
{
sfence();
dc_cvac_range(memory_uint8_off (memory, addr), Long_val (len));
sfence();
return Val_unit ;
}
#elif ART_CLFLUSHOPT
void clflushopt(const void *ptr) {
asm volatile ("clflushopt %0" : "+m" (ptr));
Expand Down
51 changes: 34 additions & 17 deletions lib/rowex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ let _header_compact_count = _header_count + 2

let _header_length = _header_compact_count + 2

let () = match Sys.word_size = 64 with
| true -> assert (_header_length = 32)
| false -> assert (_header_length = 24)

let _bits_kind = Sys.word_size - 3

let _n4_kind = 0b00
Expand Down Expand Up @@ -394,22 +398,27 @@ module Make (S : S) = struct

(**** FIND CHILD ****)

let _n4_align_length = match Sys.word_size = 64 with
| true -> 64 - (_header_length + 4)
| false -> 32 - (_header_length + 4)
(* TODO(dinosaure): check the 32-bits branch. *)

let n4_find_child addr k =
let* _0 = atomic_get Addr.(addr + _header_length + 0) Value.int8 in
if _0 = k
then atomic_get Addr.(addr + _header_length + 4 + (Addr.length * 0))
then atomic_get Addr.(addr + _header_length + 4 + _n4_align_length + (Addr.length * 0))
Value.addr_rd else
let* _1 = atomic_get Addr.(addr + _header_length + 1) Value.int8 in
if _1 = k
then atomic_get Addr.(addr + _header_length + 4 + (Addr.length * 1))
then atomic_get Addr.(addr + _header_length + 4 + _n4_align_length + (Addr.length * 1))
Value.addr_rd else
let* _2 = atomic_get Addr.(addr + _header_length + 2) Value.int8 in
if _2 = k
then atomic_get Addr.(addr + _header_length + 4 + (Addr.length * 2))
then atomic_get Addr.(addr + _header_length + 4 + _n4_align_length + (Addr.length * 2))
Value.addr_rd else
let* _3 = atomic_get Addr.(addr + _header_length + 3) Value.int8 in
if _3 = k
then atomic_get Addr.(addr + _header_length + 4 + (Addr.length * 3))
then atomic_get Addr.(addr + _header_length + 4 + _n4_align_length + (Addr.length * 3))
Value.addr_rd else
( Log.debug (fun m -> m "No child for %02x into N4" k)
; return Addr.null )
Expand Down Expand Up @@ -470,7 +479,7 @@ module Make (S : S) = struct
(succ idx) max
[@@inline]

let n4_any_child addr = _node_any_child addr ~header:4 Addr.null 0 4
let n4_any_child addr = _node_any_child addr ~header:(4 + _n4_align_length) Addr.null 0 4
let n16_any_child addr = _node_any_child addr ~header:16 Addr.null 0 16
let n48_any_child addr = _node_any_child addr ~header:256 Addr.null 0 48
let n256_any_child addr = _node_any_child addr ~header:0 Addr.null 0 256
Expand Down Expand Up @@ -581,7 +590,8 @@ module Make (S : S) = struct
fprintf ppf "{:leaf @[<hov>key= %S;@ value= %d;@] }" key value
else
let* n, header = get_type addr >>| function
| 0 -> 4, 4 | 1 -> 16, 16 | 2 -> 48, 256 | _ -> 256, 0 in
| 0 -> 4, 4 + _n4_align_length
| 1 -> 16, 16 | 2 -> 48, 256 | _ -> 256, 0 in
let* () = fprintf ppf "{:node @[<hov>hdr= @[<hov>" in
let* () = pp_record ppf addr in
let* () = fprintf ppf "@];@ key= @[<hov>" in
Expand Down Expand Up @@ -820,7 +830,10 @@ module Make (S : S) = struct

[@@@warning "+37"]

let _sizeof_n4 = _header_length + 4 + (4 * Addr.length)
let _sizeof_n4 = match Sys.word_size = 64 with
| true -> _header_length + 4 + 28 (* align *) + (4 * Addr.length)
| false -> _header_length + 4 + (4 * Addr.length) (* TODO *)

let _sizeof_n16 = _header_length + 16 + (16 * Addr.length)
let _sizeof_n48 = _header_length + 256 + (48 * Addr.length)
let _sizeof_n256 = _header_length + (256 * Addr.length)
Expand Down Expand Up @@ -933,7 +946,7 @@ module Make (S : S) = struct
Value.int8 k in
let* () = persist addr ~len:_sizeof_n4 in
let* () = atomic_set
Addr.(to_wronly (addr + _header_length + 4
Addr.(to_wronly (addr + _header_length + 4 + _n4_align_length
+ (compact_count * Addr.length)))
Value.addr_rd value in
let* () = persist
Expand All @@ -946,7 +959,7 @@ module Make (S : S) = struct
Addr.(addr + _header_length + compact_count)
Value.int8 k in
let* () = atomic_set
Addr.(addr + _header_length + 4 + (compact_count * Addr.length))
Addr.(addr + _header_length + 4 + _n4_align_length + (compact_count * Addr.length))
Value.addr_rd value in
return true

Expand Down Expand Up @@ -1012,9 +1025,9 @@ module Make (S : S) = struct
then
let* key = atomic_get Addr.(addr + _header_length + i) Value.int8 in
let* child = atomic_get
Addr.(addr + _header_length + 4 + (i * Addr.length)) Value.addr_rd in
Addr.(addr + _header_length + 4 + _n4_align_length + (i * Addr.length)) Value.addr_rd in
if not (Addr.is_null child) && key = k
then atomic_set Addr.(addr + _header_length + 4 + (i * Addr.length))
then atomic_set Addr.(addr + _header_length + 4 + _n4_align_length + (i * Addr.length))
Value.addr_rd ptr
else _n4_update_child addr k ptr (succ i)
else assert false (* XXX(dinosaure): impossible or integrity problem! *)
Expand Down Expand Up @@ -1076,6 +1089,10 @@ module Make (S : S) = struct

let _n4_ks = String.make 4 '\000'
let _n4_vs = String.concat "" (List.init 4 (const string_of_null_addr))
let _n4_align = match Sys.word_size = 64 with
| true -> String.make (64 - (_header_length + 4)) '\000'
| false -> String.make (32 - (_header_length + 4)) '\000'
(* TODO(dinosaure): check the 32-bits branch. *)

let alloc_n4 ~prefix:p ~prefix_count ~level =
let prefix = Bytes.make 4 '\000' in
Expand All @@ -1086,7 +1103,7 @@ module Make (S : S) = struct
let l = leint31_to_string level in
allocate ~kind:`Node
[ Bytes.unsafe_to_string prefix; prefix_count; k; o; l; _count
; _compact_count; _n4_ks; _n4_vs ]
; _compact_count; _n4_ks; _n4_align; _n4_vs ]
~len:_sizeof_n4 >>| n4

let _n16_ks = String.make 16 '\000'
Expand Down Expand Up @@ -1139,7 +1156,7 @@ module Make (S : S) = struct
if i = compact_count
then return ()
else
let* value = atomic_get Addr.(n4 + _header_length + 4 + (i * Addr.length))
let* value = atomic_get Addr.(n4 + _header_length + 4 + _n4_align_length + (i * Addr.length))
Value.addr_rd in
match Addr.is_null value with
| true -> _copy_n4_into_n16 ~compact_count n4 n16 (succ i)
Expand Down Expand Up @@ -1168,7 +1185,7 @@ module Make (S : S) = struct
if i = 4
then return ()
else
let* value = atomic_get Addr.(nx + _header_length + 4 + (i * Addr.length))
let* value = atomic_get Addr.(nx + _header_length + 4 + _n4_align_length + (i * Addr.length))
Value.addr_rd in
match Addr.is_null value with
| true -> _copy_n4_into_n4 nx ny (succ i)
Expand Down Expand Up @@ -1271,7 +1288,7 @@ module Make (S : S) = struct
let* () = write_unlock p in
let* () = write_unlock_and_obsolete addr in
let* uid = atomic_get Addr.(addr + _header_owner) Value.leintnat in
collect addr ~len:(_header_length + 4 + (Addr.length * 4)) ~uid )
collect addr ~len:(_header_length + 4 + _n4_align_length + (Addr.length * 4)) ~uid )

let _insert_grow_n16_n48 (N16 addr as n16) p k kp value need_to_restart =
let* inserted = add_child_n16 n16 k value true in
Expand Down Expand Up @@ -1328,15 +1345,15 @@ module Make (S : S) = struct
(* XXX(dinosaure): assert (_ = true); *)
let* () = write_lock_or_restart p need_to_restart in
if !need_to_restart
then ( let* () = delete addr' (_header_length + 4 + (Addr.length * 4)) in
then ( let* () = delete addr' (_header_length + 4 + _n4_align_length + (Addr.length * 4)) in
write_unlock addr )
else
let* () = persist addr' ~len:_sizeof_n4 in
let* () = update_child p kp (Addr.to_rdonly addr') in
let* () = write_unlock p in
let* () = write_unlock_and_obsolete addr in
let* uid = atomic_get Addr.(addr + _header_owner) Value.leintnat in
collect addr ~len:(_header_length + 4 + (Addr.length * 4)) ~uid
collect addr ~len:(_header_length + 4 + _n4_align_length + (Addr.length * 4)) ~uid

let insert_compact_n16 (N16 addr as n16) p k kp value need_to_restart =
let* prefix, prefix_count = get_prefix addr in
Expand Down
2 changes: 1 addition & 1 deletion rowex.opam
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ depends: [
"crowbar" {>= "0.2" & with-test}
]
available:
arch != "ppc64" & arch != "arm32" & arch != "arm64" & arch != "x86_32" & arch != "s390x"
arch != "ppc64" & arch != "arm32" & arch != "x86_32" & arch != "s390x"
build: ["dune" "build" "-p" name "-j" jobs]
run-test: ["dune" "runtest" "-p" name "-j" jobs]
dev-repo: "git+https://github.com/dinosaure/art.git"

0 comments on commit b9996f9

Please sign in to comment.