From 8bfef8f1b60155020abc9bdab8aef9788eb458ba Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 1 Aug 2024 17:11:07 +0000 Subject: [PATCH] codegen: take gc roots (and alloca alignment) more seriously Due to limitations in the LLVM implementation, we are forced to emit fairly bad code here. But we need to make sure it is still correct with respect to GC rooting. The PR 50833c84d454ef989797e035294ba27b3cca79b7 also changed the meaning of haspadding without changing all of the existing users to use the new equivalent flag (haspadding || !isbitsegal), incurring additional breakage here as well and needing more tests for that. Fixes #54720 --- src/abi_aarch64.cpp | 8 ++-- src/abi_arm.cpp | 2 +- src/abi_ppc64le.cpp | 2 +- src/cgutils.cpp | 97 ++++++++++++++++++++++++++++----------------- src/codegen.cpp | 3 ++ src/datatype.c | 14 +++---- test/atomics.jl | 6 +++ 7 files changed, 83 insertions(+), 49 deletions(-) diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp index 7c31b6606139a..0a193ee132556 100644 --- a/src/abi_aarch64.cpp +++ b/src/abi_aarch64.cpp @@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout { Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) - // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0` + // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0` if (dt->layout == NULL || jl_is_layout_opaque(dt->layout)) return nullptr; size_t nfields = dt->layout->nfields; @@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) - // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0` + // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0` Type *lltype; // Check size first since it's cheaper. switch (jl_datatype_size(dt)) { @@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) - if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding) + if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding) return nullptr; return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx); } @@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const // uniquely addressable members. // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector) size_t dsz = jl_datatype_size(dt); - if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding) + if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding) return NULL; nele = 0; ElementType eltype; diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp index 68f980d7b40da..8839a37da6e13 100644 --- a/src/abi_arm.cpp +++ b/src/abi_arm.cpp @@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const if (jl_is_structtype(dt)) { // Fast path checks before descending the type hierarchy // (4 x 128b vector == 64B max size) - if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding) + if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding) return 0; base = NULL; diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp index 1f10817cfeeee..f02e1022ddc2d 100644 --- a/src/abi_ppc64le.cpp +++ b/src/abi_ppc64le.cpp @@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout { // count the homogeneous floating aggregate size (saturating at max count of 8) unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const { - if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding) + if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding) return 9; size_t i, l = ty->layout->nfields; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 297fcb164b112..08d51f52b613b 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2119,12 +2119,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, FailOrder = AtomicOrdering::Monotonic; unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); AllocaInst *intcast = nullptr; + Type *intcast_eltyp = nullptr; + bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0; if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) { + intcast_eltyp = elty; + elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); if (!issetfield) { intcast = emit_static_alloca(ctx, elty); setName(ctx.emission_context, intcast, "atomic_store_box"); } - elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } Type *realelty = elty; if (Order != AtomicOrdering::NotAtomic && isa(elty)) { @@ -2133,14 +2136,20 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2); } Value *r = nullptr; - if (issetfield || isswapfield || isreplacefield || issetfieldonce) { - if (isboxed) + if (issetfield || isswapfield || isreplacefield || issetfieldonce) { // e.g. !ismodifyfield + assert(isboxed || rhs.typ == jltype); + if (isboxed) { r = boxed(ctx, rhs); - else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) { + } + else if (intcast) { + emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign()); + r = ctx.builder.CreateLoad(realelty, intcast); + } + else if (aliasscope || Order != AtomicOrdering::NotAtomic || tracked_pointers) { r = emit_unbox(ctx, realelty, rhs, jltype); - if (realelty != elty) - r = ctx.builder.CreateZExt(r, elty); } + if (realelty != elty) + r = ctx.builder.CreateZExt(r, elty); } if (isboxed) alignment = sizeof(void*); @@ -2222,7 +2231,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, Current->addIncoming(instr, SkipBB); ctx.builder.SetInsertPoint(BB); } - Compare = emit_unbox(ctx, realelty, cmp, jltype); + cmp = update_julia_type(ctx, cmp, jltype); + if (intcast) { + emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign()); + Compare = ctx.builder.CreateLoad(realelty, intcast); + } + else { + Compare = emit_unbox(ctx, realelty, cmp, jltype); + } if (realelty != elty) Compare = ctx.builder.CreateZExt(Compare, elty); } @@ -2269,16 +2285,17 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (realelty != elty) realCompare = ctx.builder.CreateTrunc(realCompare, realelty); if (intcast) { + assert(!isboxed); ctx.builder.CreateStore(realCompare, intcast); - if (maybe_null_if_boxed) - realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast); + if (tracked_pointers) + realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast); } - if (maybe_null_if_boxed) { - Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare); - if (first_ptr) - null_load_check(ctx, first_ptr, mod, var); + if (maybe_null_if_boxed && tracked_pointers) { + Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare); + assert(first_ptr); + null_load_check(ctx, first_ptr, mod, var); } - if (intcast) + if (intcast && !tracked_pointers) oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack); else oldval = mark_julia_type(ctx, realCompare, isboxed, jltype); @@ -2286,11 +2303,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (isboxed) { r = boxed(ctx, rhs); } - else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) { + else if (intcast) { + emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign()); + r = ctx.builder.CreateLoad(realelty, intcast); + if (!tracked_pointers) // oldval is a slot, so put the oldval back + ctx.builder.CreateStore(realCompare, intcast); + } + else if (Order != AtomicOrdering::NotAtomic) { + assert(!tracked_pointers); r = emit_unbox(ctx, realelty, rhs, jltype); - if (realelty != elty) - r = ctx.builder.CreateZExt(r, elty); } + if (realelty != elty) + r = ctx.builder.CreateZExt(r, elty); if (needlock) emit_lockstate_value(ctx, needlock, true); cmp = oldval; @@ -2356,9 +2380,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, realinstr = ctx.builder.CreateTrunc(realinstr, realelty); if (intcast) { ctx.builder.CreateStore(realinstr, intcast); + // n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack); if (maybe_null_if_boxed) - realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast); + realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast); } else { oldval = mark_julia_type(ctx, realinstr, isboxed, jltype); @@ -2398,20 +2423,23 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ctx.builder.SetInsertPoint(DoneBB); if (needlock) emit_lockstate_value(ctx, needlock, false); - if (parent != NULL) { + if (parent != NULL && r && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) { if (isreplacefield || issetfieldonce) { - // TODO: avoid this branch if we aren't making a write barrier BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f); DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f); ctx.builder.CreateCondBr(Success, BB, DoneBB); ctx.builder.SetInsertPoint(BB); } - if (r) { - if (!isboxed) - emit_write_multibarrier(ctx, parent, r, rhs.typ); - else if (!type_is_permalloc(rhs.typ)) - emit_write_barrier(ctx, parent, r); + if (realelty != elty) + r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty)); + if (intcast) { + ctx.builder.CreateStore(r, intcast); + r = ctx.builder.CreateLoad(intcast_eltyp, intcast); } + if (!isboxed) + emit_write_multibarrier(ctx, parent, r, rhs.typ); + else if (!type_is_permalloc(rhs.typ)) + emit_write_barrier(ctx, parent, r); if (isreplacefield || issetfieldonce) { ctx.builder.CreateBr(DoneBB); ctx.builder.SetInsertPoint(DoneBB); @@ -2430,21 +2458,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty)); if (intcast) { ctx.builder.CreateStore(instr, intcast); - instr = nullptr; + if (tracked_pointers) + instr = ctx.builder.CreateLoad(intcast_eltyp, intcast); } - if (maybe_null_if_boxed) { - if (intcast) - instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast); + if (maybe_null_if_boxed && tracked_pointers) { Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr); - if (first_ptr) - null_load_check(ctx, first_ptr, mod, var); - if (intcast && !first_ptr) - instr = nullptr; + assert(first_ptr); + null_load_check(ctx, first_ptr, mod, var); } - if (instr) - oldval = mark_julia_type(ctx, instr, isboxed, jltype); - else + if (intcast && !tracked_pointers) oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack); + else + oldval = mark_julia_type(ctx, instr, isboxed, jltype); if (isreplacefield) { Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext())); const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)}; diff --git a/src/codegen.cpp b/src/codegen.cpp index d8d7a1fd23814..ef18d9bd1b673 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3374,11 +3374,14 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty, size_t padding_bytes = 0; size_t nfields = jl_datatype_nfields(aty); size_t total_size = jl_datatype_size(aty); + assert(aty->layout->flags.isbitsegal); for (size_t i = 0; i < nfields; ++i) { size_t offset = jl_field_offset(aty, i); size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1); size_t fsz = jl_field_size(aty, i); jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i); + assert(jl_is_datatype(fty)); // union fields should never reach here + assert(fty->layout->flags.isbitsegal); if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) { // The field has no internal padding data_bytes += fsz; diff --git a/src/datatype.c b/src/datatype.c index e7ee15a63f56e..e88977dd67400 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -1250,7 +1250,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre- } else if (nb == 1) { uint8_t *y8 = (uint8_t*)y; - assert(!dt->layout->flags.haspadding); + assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding); if (dt == et) { *y8 = *(uint8_t*)expected; uint8_t z8 = *(uint8_t*)src; @@ -1263,7 +1263,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre- } else if (nb == 2) { uint16_t *y16 = (uint16_t*)y; - assert(!dt->layout->flags.haspadding); + assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding); if (dt == et) { *y16 = *(uint16_t*)expected; uint16_t z16 = *(uint16_t*)src; @@ -1281,7 +1281,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre- uint32_t z32 = zext_read32(src, nb); while (1) { success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32); - if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt)) + if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt)) break; } } @@ -1298,7 +1298,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre- uint64_t z64 = zext_read64(src, nb); while (1) { success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64); - if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt)) + if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt)) break; } } @@ -1316,7 +1316,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre- jl_uint128_t z128 = zext_read128(src, nb); while (1) { success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128); - if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt)) + if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt)) break; } } @@ -2010,7 +2010,7 @@ inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_ else { char *px = lock(p, parent, needlock, isatomic); int success = memcmp(px, (char*)r, fsz) == 0; - if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding) + if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding)) success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty); if (success) { if (isunion) { @@ -2125,7 +2125,7 @@ inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value success = (rty == jl_typeof(expected)); if (success) { success = memcmp((char*)r, (char*)expected, rsz) == 0; - if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding) + if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding)) success = jl_egal__bits(r, expected, (jl_datatype_t*)rty); } *((uint8_t*)r + fsz) = success ? 1 : 0; diff --git a/test/atomics.jl b/test/atomics.jl index 3df9e7d0f63c0..165c8dc4e2dfc 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -129,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20)) test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing)) test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20)) test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20)) +test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20)) test_field_operators(ARefxy{PadIntA}(123_10, 123_20)) test_field_operators(ARefxy{PadIntB}(123_10, 123_20)) #FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20)) @@ -317,6 +318,7 @@ test_field_orderings(ARefxy{Any}(true, false), true, false) test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing) test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1) test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40)) +test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40)) test_field_orderings(10.0, 20.0) test_field_orderings(NaN, Inf) @@ -568,6 +570,7 @@ test_global_operators(Any) test_global_operators(Union{Nothing,Int}) test_global_operators(Complex{Int32}) test_global_operators(Complex{Int128}) +test_global_operators(Complex{Real}) test_global_operators(PadIntA) test_global_operators(PadIntB) #FIXME: test_global_operators(Int24) @@ -691,6 +694,7 @@ test_global_orderings(Any, true, false) test_global_orderings(Union{Nothing,Missing}, nothing, missing) test_global_orderings(Union{Nothing,Int}, nothing, 123_1) test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40)) +test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40)) test_global_orderings(Float64, 10.0, 20.0) test_global_orderings(Float64, NaN, Inf) @@ -844,6 +848,7 @@ test_memory_operators(Any) test_memory_operators(Union{Nothing,Int}) test_memory_operators(Complex{Int32}) test_memory_operators(Complex{Int128}) +test_memory_operators(Complex{Real}) test_memory_operators(PadIntA) test_memory_operators(PadIntB) #FIXME: test_memory_operators(Int24) @@ -1031,6 +1036,7 @@ test_memory_orderings(Any, true, false) test_memory_orderings(Union{Nothing,Missing}, nothing, missing) test_memory_orderings(Union{Nothing,Int}, nothing, 123_1) test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40)) +test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40)) test_memory_orderings(10.0, 20.0) test_memory_orderings(NaN, Inf)