diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 327ada8a072a..e97582b96e8e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -781,7 +781,7 @@ jobs: - uses: ./.github/actions/install-rust # Install OpenVINO - - uses: abrown/install-openvino-action@v8 + - uses: abrown/install-openvino-action@v9 if: runner.arch == 'X64' # Install WinML for testing wasi-nn WinML backend. WinML is only available diff --git a/Cargo.lock b/Cargo.lock index 4f16cb82ae50..fffb7d9ebe56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -129,6 +129,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "async-trait" version = "0.1.71" @@ -1826,9 +1832,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libtest-mimic" @@ -1967,6 +1973,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "multi-stash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685a9ac4b61f4e728e1d2c6a7844609c16527aeb5e6c865915c08e619c16410f" + [[package]] name = "nom" version = "7.1.3" @@ -2067,20 +2079,19 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openvino" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aee013796927eec6012a344f10ecdc06bf26de79c626a2395e3f115464907ef6" +checksum = "8f03a664ab0b6917131f5c1a787795fa4d19ad6a334caf9c96284453abdf23fd" dependencies = [ "openvino-finder", "openvino-sys", - "thiserror", ] [[package]] name = "openvino-finder" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af4c6841df4cd60fef743015f3348f81b6b225bd255ed0c4cab6e8c479e45eaa" +checksum = "34d6bbb3e00d9ad3cd60bca1341665a9cfb2b6764df37c58d921627368ae32fc" dependencies = [ "cfg-if", "log", @@ -2088,11 +2099,11 @@ dependencies = [ [[package]] name = "openvino-sys" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f62fc2bd6882f2300a6b5017eaad292586d70995d333582aabcf1f1121cd147c" +checksum = "04315994236727c3573f7e8d8bf857e93ff373ee2e063f08aa78aceac58e3bc5" dependencies = [ - "env_logger 0.10.0", + "env_logger 0.11.5", "libloading", "once_cell", "openvino-finder", @@ -2689,9 +2700,9 @@ checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] name = "smallvec" -version = "1.11.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" dependencies = [ "serde", ] @@ -2748,6 +2759,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string-interner" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c6a0d765f5807e98a091107bae0a56ea3799f66a5de47b2c84c94a39c09974e" +dependencies = [ + "cfg-if", + "hashbrown 0.14.3", + "serde", +] + [[package]] name = "strsim" version = "0.11.1" @@ -3503,28 +3525,36 @@ dependencies = [ [[package]] name = "wasmi" -version = "0.31.1" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acfc1e384a36ca532d070a315925887247f3c7e23567e23e0ac9b1c5d6b8bf76" +checksum = "b07e84e3bcdab2f4301827623260ada2557596ca462f7470b60f5182a25270b1" dependencies = [ + "arrayvec", + "multi-stash", "smallvec", "spin", - "wasmi_arena", + "wasmi_collections", "wasmi_core", + "wasmi_ir", "wasmparser-nostd", ] [[package]] -name = "wasmi_arena" -version = "0.4.1" +name = "wasmi_collections" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "104a7f73be44570cac297b3035d76b169d6599637631cf37a1703326a0727073" +checksum = "0d0fd5f4f2c4fe0c98554bb7293108ed2b1d0c124dce0974f999de7d517d37bc" +dependencies = [ + "ahash", + "hashbrown 0.14.3", + "string-interner", +] [[package]] name = "wasmi_core" -version = "0.13.0" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf1a7db34bff95b85c261002720c00c3a6168256dcb93041d3fa2054d19856a" +checksum = "76a5f7bbd933a0fb3bac6c541f8bd90c0c8adcd91bb3ac088a2088995325b3d9" dependencies = [ "downcast-rs", "libm", @@ -3532,6 +3562,15 @@ dependencies = [ "paste", ] +[[package]] +name = "wasmi_ir" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3345445247388df2b5b35250a30c9209c27c8d2c6db1bf4c89b65636264bf9" +dependencies = [ + "wasmi_core", +] + [[package]] name = "wasmparser" version = "0.219.0" @@ -3559,9 +3598,9 @@ dependencies = [ [[package]] name = "wasmparser-nostd" -version = "0.100.1" +version = "0.100.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9157cab83003221bfd385833ab587a039f5d6fa7304854042ba358a3b09e0724" +checksum = "d5a015fe95f3504a94bb1462c717aae75253e39b9dd6c3fb1062c934535c64aa" dependencies = [ "indexmap-nostd", ] diff --git a/cranelift/codegen/meta/src/isa/x86.rs b/cranelift/codegen/meta/src/isa/x86.rs index 18f80b067394..053d15c8f357 100644 --- a/cranelift/codegen/meta/src/isa/x86.rs +++ b/cranelift/codegen/meta/src/isa/x86.rs @@ -17,6 +17,12 @@ pub(crate) fn define() -> TargetIsa { "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false, ); + let has_cmpxchg16b = settings.add_bool( + "has_cmpxchg16b", + "Has support for CMPXCHG16b.", + "CMPXCHG16b: CPUID.01H:ECX.CMPXCHG16B[bit 13]", + false, + ); let has_sse41 = settings.add_bool( "has_sse41", "Has support for SSE4.1.", @@ -106,6 +112,7 @@ pub(crate) fn define() -> TargetIsa { false, ); + settings.add_predicate("use_cmpxchg16b", predicate!(has_cmpxchg16b)); settings.add_predicate("use_ssse3", predicate!(has_ssse3)); settings.add_predicate("use_sse41", predicate!(has_sse41)); settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42)); @@ -141,14 +148,30 @@ pub(crate) fn define() -> TargetIsa { // Intel CPUs // Netburst - settings.add_preset("nocona", "Nocona microarchitecture.", preset!(sse3)); + settings.add_preset( + "nocona", + "Nocona microarchitecture.", + preset!(sse3 && has_cmpxchg16b), + ); // Intel Core 2 Solo/Duo - settings.add_preset("core2", "Core 2 microarchitecture.", preset!(sse3)); - settings.add_preset("penryn", "Penryn microarchitecture.", preset!(sse41)); + settings.add_preset( + "core2", + "Core 2 microarchitecture.", + preset!(sse3 && has_cmpxchg16b), + ); + settings.add_preset( + "penryn", + "Penryn microarchitecture.", + preset!(sse41 && has_cmpxchg16b), + ); // Intel Atom CPUs - let atom = settings.add_preset("atom", "Atom microarchitecture.", preset!(ssse3)); + let atom = settings.add_preset( + "atom", + "Atom microarchitecture.", + preset!(ssse3 && has_cmpxchg16b), + ); settings.add_preset("bonnell", "Bonnell microarchitecture.", preset!(atom)); let silvermont = settings.add_preset( "silvermont", @@ -186,7 +209,7 @@ pub(crate) fn define() -> TargetIsa { let nehalem = settings.add_preset( "nehalem", "Nehalem microarchitecture.", - preset!(sse42 && has_popcnt), + preset!(sse42 && has_popcnt && has_cmpxchg16b), ); settings.add_preset("corei7", "Core i7 microarchitecture.", preset!(nehalem)); let westmere = settings.add_preset("westmere", "Westmere microarchitecture.", preset!(nehalem)); @@ -229,7 +252,15 @@ pub(crate) fn define() -> TargetIsa { let knights_landing = settings.add_preset( "knl", "Knights Landing microarchitecture.", - preset!(has_popcnt && has_avx512f && has_fma && has_bmi1 && has_bmi2 && has_lzcnt), + preset!( + has_popcnt + && has_avx512f + && has_fma + && has_bmi1 + && has_bmi2 + && has_lzcnt + && has_cmpxchg16b + ), ); settings.add_preset( "knm", @@ -312,22 +343,22 @@ pub(crate) fn define() -> TargetIsa { settings.add_preset( "opteron-sse3", "Opteron microarchitecture with support for SSE3 instructions.", - preset!(sse3), + preset!(sse3 && has_cmpxchg16b), ); settings.add_preset( "k8-sse3", "K8 Hammer microarchitecture with support for SSE3 instructions.", - preset!(sse3), + preset!(sse3 && has_cmpxchg16b), ); settings.add_preset( "athlon64-sse3", "Athlon 64 microarchitecture with support for SSE3 instructions.", - preset!(sse3), + preset!(sse3 && has_cmpxchg16b), ); let barcelona = settings.add_preset( "barcelona", "Barcelona microarchitecture.", - preset!(has_popcnt && has_lzcnt), + preset!(has_popcnt && has_lzcnt && has_cmpxchg16b), ); settings.add_preset( "amdfam10", @@ -338,7 +369,7 @@ pub(crate) fn define() -> TargetIsa { let btver1 = settings.add_preset( "btver1", "Bobcat microarchitecture.", - preset!(ssse3 && has_lzcnt && has_popcnt), + preset!(ssse3 && has_lzcnt && has_popcnt && has_cmpxchg16b), ); settings.add_preset( "btver2", @@ -349,7 +380,7 @@ pub(crate) fn define() -> TargetIsa { let bdver1 = settings.add_preset( "bdver1", "Bulldozer microarchitecture", - preset!(has_lzcnt && has_popcnt && ssse3), + preset!(has_lzcnt && has_popcnt && ssse3 && has_cmpxchg16b), ); let bdver2 = settings.add_preset( "bdver2", @@ -366,7 +397,9 @@ pub(crate) fn define() -> TargetIsa { let znver1 = settings.add_preset( "znver1", "Zen (first generation) microarchitecture.", - preset!(sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma), + preset!( + sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma && has_cmpxchg16b + ), ); let znver2 = settings.add_preset( "znver2", @@ -397,7 +430,7 @@ pub(crate) fn define() -> TargetIsa { let x86_64_v2 = settings.add_preset( "x86-64-v2", "Generic x86-64 (V2) microarchitecture.", - preset!(sse42 && has_popcnt), + preset!(sse42 && has_popcnt && has_cmpxchg16b), ); let x86_64_v3 = settings.add_preset( "x84_64_v3", diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 8030bed91e37..018b9807d4a0 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3663,7 +3663,7 @@ pub(crate) fn define( let AtomicMem = &TypeVar::new( "AtomicMem", "Any type that can be stored in memory, which can be used in an atomic operation", - TypeSetBuilder::new().ints(8..64).build(), + TypeSetBuilder::new().ints(8..128).build(), ); ig.push( @@ -3671,10 +3671,11 @@ pub(crate) fn define( "atomic_rmw", r#" Atomically read-modify-write memory at `p`, with second operand `x`. The old value is - returned. `p` has the type of the target word size, and `x` may be an integer type of - 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the - same as the type of `x`. This operation is sequentially consistent and creates - happens-before edges that order normal (non-atomic) loads and stores. + returned. `p` has the type of the target word size, and `x` may be any integer type; note + that some targets require specific target features to be enabled in order to support 128-bit + integer atomics. The type of the returned value is the same as the type of `x`. This + operation is sequentially consistent and creates happens-before edges that order normal + (non-atomic) loads and stores. "#, &formats.atomic_rmw, ) @@ -3699,11 +3700,11 @@ pub(crate) fn define( Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`, storing `x` if the value at `p` equals `e`. The old value at `p` is returned, regardless of whether the operation succeeds or fails. `p` has the type of the target - word size, and `x` and `e` must have the same type and the same size, which may be an - integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned - value is the same as the type of `x` and `e`. This operation is sequentially - consistent and creates happens-before edges that order normal (non-atomic) loads and - stores. + word size, and `x` and `e` must have the same type and the same size, which may be any + integer type; note that some targets require specific target features to be enabled in order + to support 128-bit integer atomics. The type of the returned value is the same as the type + of `x` and `e`. This operation is sequentially consistent and creates happens-before edges + that order normal (non-atomic) loads and stores. "#, &formats.atomic_cas, ) @@ -3728,9 +3729,10 @@ pub(crate) fn define( Atomically load from memory at `p`. This is a polymorphic instruction that can load any value type which has a memory - representation. It should only be used for integer types with 8, 16, 32 or 64 bits. - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. + representation. It can only be used for integer types; note that some targets require + specific target features to be enabled in order to support 128-bit integer atomics. This + operation is sequentially consistent and creates happens-before edges that order normal + (non-atomic) loads and stores. "#, &formats.load_no_offset, ) @@ -3752,9 +3754,10 @@ pub(crate) fn define( Atomically store `x` to memory at `p`. This is a polymorphic instruction that can store any value type with a memory - representation. It should only be used for integer types with 8, 16, 32 or 64 bits. - This operation is sequentially consistent and creates happens-before edges that order - normal (non-atomic) loads and stores. + representation. It can only be used for integer types; note that some targets require + specific target features to be enabled in order to support 128-bit integer atomics This + operation is sequentially consistent and creates happens-before edges that order normal + (non-atomic) loads and stores. "#, &formats.store_no_offset, ) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 0e144560798b..3d7a72462048 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -664,6 +664,24 @@ (mem SyntheticAmode) (dst_old WritableReg)) + ;; A standard (native) `lock cmpxchg16b (amode)`, with register + ;; conventions: + ;; + ;; `mem` (read) address + ;; %rbx (low), %rcx (high) (read) replacement value + ;; %rax (low), %rdx (high) (modified) in: expected value, out: value that was actually at `dst` + ;; %rflags is written. Do not assume anything about it after the instruction. + ;; + ;; The instruction "succeeded" iff the bits of %rax and %rdx + ;; afterwards are the same as they were before. + (LockCmpxchg16b (replacement_low Reg) + (replacement_high Reg) + (expected_low Reg) + (expected_high Reg) + (mem BoxSyntheticAmode) + (dst_old_low WritableReg) + (dst_old_high WritableReg)) + ;; A synthetic instruction, based on a loop around a native `lock ;; cmpxchg` instruction. ;; @@ -696,6 +714,46 @@ (temp WritableReg) (dst_old WritableReg)) + ;; A synthetic instruction, based on a loop around a native `lock + ;; cmpxchg16b` instruction. + ;; + ;; This is the same as `AtomicRmwSeq`, but for 128-bit integers. + ;; + ;; For `MachAtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead. + ;; + ;; This instruction sequence has fixed register uses as follows: + ;; - %rax (low), %rdx (high) (written) the old value at `mem` + ;; - %rbx (low), %rcx (high) (written) used as temp registers to hold + ;; the replacement value + ;; - %rflags is written. Do not assume anything about it after the + ;; instruction. + (Atomic128RmwSeq (op MachAtomicRmwOp) + (mem BoxSyntheticAmode) + (operand_low Reg) + (operand_high Reg) + (temp_low WritableReg) + (temp_high WritableReg) + (dst_old_low WritableReg) + (dst_old_high WritableReg)) + + ;; A synthetic instruction, based on a loop around a native `lock + ;; cmpxchg16b` instruction. + ;; + ;; This is `Atomic128XchgSeq` but only for `MachAtomicRmwOp::Xchg`. As + ;; the replacement value is the same every time, this instruction doesn't + ;; require any temporary registers. + ;; + ;; This instruction sequence has fixed register uses as follows: + ;; - %rax (low), %rdx (high) (written) the old value at `mem` + ;; - %rbx (low), %rcx (high) (read) the replacement value + ;; - %rflags is written. Do not assume anything about it after the + ;; instruction. + (Atomic128XchgSeq (mem SyntheticAmode) + (operand_low Reg) + (operand_high Reg) + (dst_old_low WritableReg) + (dst_old_high WritableReg)) + ;; A memory fence (mfence, lfence or sfence). (Fence (kind FenceKind)) @@ -765,6 +823,11 @@ (type BoxCallIndInfo extern (enum)) (type BoxReturnCallInfo extern (enum)) (type BoxReturnCallIndInfo extern (enum)) +(type BoxSyntheticAmode extern (enum)) + +(decl pure box_synthetic_amode (SyntheticAmode) BoxSyntheticAmode) +(extern constructor box_synthetic_amode box_synthetic_amode) +(convert SyntheticAmode BoxSyntheticAmode box_synthetic_amode) ;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits. (decl operand_size_of_type_32_64 (Type) OperandSize) @@ -1865,6 +1928,9 @@ (decl pure use_avx2 () bool) (extern constructor use_avx2 use_avx2) +(decl pure use_cmpxchg16b () bool) +(extern constructor use_cmpxchg16b use_cmpxchg16b) + ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; ;; Extract a constant `Imm8Reg.Imm8` from a value operand. @@ -5217,6 +5283,17 @@ (_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst)))) dst)) +(decl x64_cmpxchg16b (ValueRegs ValueRegs SyntheticAmode) ValueRegs) +(rule (x64_cmpxchg16b expected replacement addr) + (let ((expected_low Gpr (value_regs_get_gpr expected 0)) + (expected_high Gpr (value_regs_get_gpr expected 1)) + (replacement_low Gpr (value_regs_get_gpr replacement 0)) + (replacement_high Gpr (value_regs_get_gpr replacement 1)) + (dst_low WritableGpr (temp_writable_gpr)) + (dst_high WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.LockCmpxchg16b replacement_low replacement_high expected_low expected_high addr dst_low dst_high)))) + (value_regs dst_low dst_high))) + (decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr) (rule (x64_atomic_rmw_seq ty op mem input) (let ((dst WritableGpr (temp_writable_gpr)) @@ -5224,6 +5301,36 @@ (_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst)))) dst)) +(decl x64_atomic_128_rmw_seq (MachAtomicRmwOp SyntheticAmode ValueRegs) ValueRegs) +(rule (x64_atomic_128_rmw_seq op mem input) + (let ((dst_low WritableGpr (temp_writable_gpr)) + (dst_high WritableGpr (temp_writable_gpr)) + (tmp_low WritableGpr (temp_writable_gpr)) + (tmp_high WritableGpr (temp_writable_gpr)) + (input_low Gpr (value_regs_get_gpr input 0)) + (input_high Gpr (value_regs_get_gpr input 1)) + (_ Unit (emit (MInst.Atomic128RmwSeq op mem input_low input_high tmp_low tmp_high dst_low dst_high)))) + (value_regs dst_low dst_high))) + +(rule 1 (x64_atomic_128_rmw_seq (mach_atomic_rmw_op_xchg) mem input) + (let ((dst_low WritableGpr (temp_writable_gpr)) + (dst_high WritableGpr (temp_writable_gpr)) + (input_low Gpr (value_regs_get_gpr input 0)) + (input_high Gpr (value_regs_get_gpr input 1)) + (_ Unit (emit (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high)))) + (value_regs dst_low dst_high))) + +(decl x64_atomic_128_store_seq (SyntheticAmode ValueRegs) SideEffectNoResult) +(rule (x64_atomic_128_store_seq mem input) + (let ((dst_low WritableGpr (temp_writable_gpr)) + (dst_high WritableGpr (temp_writable_gpr)) + (input_low Gpr (value_regs_get_gpr input 0)) + (input_high Gpr (value_regs_get_gpr input 1))) + (SideEffectNoResult.Inst (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high)))) + +(decl mach_atomic_rmw_op_xchg () MachAtomicRmwOp) +(extern extractor mach_atomic_rmw_op_xchg mach_atomic_rmw_op_is_xchg) + ;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the ;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other. (type MachAtomicRmwOp extern (enum)) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index a6923e400ff8..7cb22c624909 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -959,6 +959,7 @@ pub enum CmpOpcode { pub(crate) enum InstructionSet { SSE, SSE2, + CMPXCHG16b, SSSE3, SSE41, SSE42, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2bcee5ad15d5..12f698daa941 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -115,6 +115,7 @@ pub(crate) fn emit( match iset_requirement { // Cranelift assumes SSE2 at least. InstructionSet::SSE | InstructionSet::SSE2 => true, + InstructionSet::CMPXCHG16b => info.isa_flags.use_cmpxchg16b(), InstructionSet::SSSE3 => info.isa_flags.use_ssse3(), InstructionSet::SSE41 => info.isa_flags.use_sse41(), InstructionSet::SSE42 => info.isa_flags.use_sse42(), @@ -4037,6 +4038,38 @@ pub(crate) fn emit( emit_std_reg_mem(sink, prefix, opcodes, 2, replacement, &amode, rex, 0); } + Inst::LockCmpxchg16b { + replacement_low, + replacement_high, + expected_low, + expected_high, + mem, + dst_old_low, + dst_old_high, + } => { + let mem = mem.clone(); + debug_assert_eq!(*replacement_low, regs::rbx()); + debug_assert_eq!(*replacement_high, regs::rcx()); + debug_assert_eq!(*expected_low, regs::rax()); + debug_assert_eq!(*expected_high, regs::rdx()); + debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); + debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); + + let amode = mem.finalize(state, sink); + // lock cmpxchg16b (mem) + // Note that 0xF0 is the Lock prefix. + emit_std_enc_mem( + sink, + LegacyPrefixes::_F0, + 0x0FC7, + 2, + 1, + &amode, + RexFlags::set_w(), + 0, + ); + } + Inst::AtomicRmwSeq { ty, op, @@ -4157,6 +4190,182 @@ pub(crate) fn emit( one_way_jmp(sink, CC::NZ, again_label); } + Inst::Atomic128RmwSeq { + op, + mem, + operand_low, + operand_high, + temp_low, + temp_high, + dst_old_low, + dst_old_high, + } => { + let operand_low = *operand_low; + let operand_high = *operand_high; + let temp_low = *temp_low; + let temp_high = *temp_high; + let dst_old_low = *dst_old_low; + let dst_old_high = *dst_old_high; + debug_assert_eq!(temp_low.to_reg(), regs::rbx()); + debug_assert_eq!(temp_high.to_reg(), regs::rcx()); + debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); + debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); + let mem = mem.finalize(state, sink).clone(); + + let again_label = sink.get_label(); + + // Load the initial value. + Inst::load(types::I64, mem.clone(), dst_old_low, ExtKind::ZeroExtend) + .emit(sink, info, state); + Inst::load(types::I64, mem.offset(8), dst_old_high, ExtKind::ZeroExtend) + .emit(sink, info, state); + + // again: + sink.bind_label(again_label, state.ctrl_plane_mut()); + + // Move old value to temp registers. + Inst::mov_r_r(OperandSize::Size64, dst_old_low.to_reg(), temp_low) + .emit(sink, info, state); + Inst::mov_r_r(OperandSize::Size64, dst_old_high.to_reg(), temp_high) + .emit(sink, info, state); + + // Perform the operation. + let operand_low_rmi = RegMemImm::reg(operand_low); + let operand_high_rmi = RegMemImm::reg(operand_high); + use inst_common::MachAtomicRmwOp as RmwOp; + match op { + RmwOp::Xchg => panic!("use `Atomic128XchgSeq` instead"), + RmwOp::Nand => { + // temp &= operand + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::And, + operand_low_rmi, + temp_low, + ) + .emit(sink, info, state); + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::And, + operand_high_rmi, + temp_high, + ) + .emit(sink, info, state); + + // temp = !temp + Inst::not(OperandSize::Size64, temp_low).emit(sink, info, state); + Inst::not(OperandSize::Size64, temp_high).emit(sink, info, state); + } + RmwOp::Umin | RmwOp::Umax | RmwOp::Smin | RmwOp::Smax => { + // Do a comparison with LHS temp and RHS operand. + // `cmp_rmi_r` and `alu_rmi_r` have opposite argument orders. + Inst::cmp_rmi_r(OperandSize::Size64, temp_low.to_reg(), operand_low_rmi) + .emit(sink, info, state); + // This will clobber `temp_high` + Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Sbb, + operand_high_rmi, + temp_high, + ) + .emit(sink, info, state); + // Restore the clobbered value + Inst::mov_r_r(OperandSize::Size64, dst_old_high.to_reg(), temp_high) + .emit(sink, info, state); + let cc = match op { + RmwOp::Umin => CC::NB, + RmwOp::Umax => CC::B, + RmwOp::Smin => CC::NL, + RmwOp::Smax => CC::L, + _ => unreachable!(), + }; + Inst::cmove(OperandSize::Size64, cc, operand_low.into(), temp_low) + .emit(sink, info, state); + Inst::cmove(OperandSize::Size64, cc, operand_high.into(), temp_high) + .emit(sink, info, state); + } + _ => { + // temp op= operand + let (op_low, op_high) = match op { + RmwOp::Add => (AluRmiROpcode::Add, AluRmiROpcode::Adc), + RmwOp::Sub => (AluRmiROpcode::Sub, AluRmiROpcode::Sbb), + RmwOp::And => (AluRmiROpcode::And, AluRmiROpcode::And), + RmwOp::Or => (AluRmiROpcode::Or, AluRmiROpcode::Or), + RmwOp::Xor => (AluRmiROpcode::Xor, AluRmiROpcode::Xor), + RmwOp::Xchg + | RmwOp::Nand + | RmwOp::Umin + | RmwOp::Umax + | RmwOp::Smin + | RmwOp::Smax => unreachable!(), + }; + Inst::alu_rmi_r(OperandSize::Size64, op_low, operand_low_rmi, temp_low) + .emit(sink, info, state); + Inst::alu_rmi_r(OperandSize::Size64, op_high, operand_high_rmi, temp_high) + .emit(sink, info, state); + } + } + + // cmpxchg16b (mem) + Inst::LockCmpxchg16b { + replacement_low: temp_low.to_reg(), + replacement_high: temp_high.to_reg(), + expected_low: dst_old_low.to_reg(), + expected_high: dst_old_high.to_reg(), + mem: Box::new(mem.into()), + dst_old_low, + dst_old_high, + } + .emit(sink, info, state); + + // jnz again + one_way_jmp(sink, CC::NZ, again_label); + } + + Inst::Atomic128XchgSeq { + mem, + operand_low, + operand_high, + dst_old_low, + dst_old_high, + } => { + let operand_low = *operand_low; + let operand_high = *operand_high; + let dst_old_low = *dst_old_low; + let dst_old_high = *dst_old_high; + debug_assert_eq!(operand_low, regs::rbx()); + debug_assert_eq!(operand_high, regs::rcx()); + debug_assert_eq!(dst_old_low.to_reg(), regs::rax()); + debug_assert_eq!(dst_old_high.to_reg(), regs::rdx()); + let mem = mem.finalize(state, sink).clone(); + + let again_label = sink.get_label(); + + // Load the initial value. + Inst::load(types::I64, mem.clone(), dst_old_low, ExtKind::ZeroExtend) + .emit(sink, info, state); + Inst::load(types::I64, mem.offset(8), dst_old_high, ExtKind::ZeroExtend) + .emit(sink, info, state); + + // again: + sink.bind_label(again_label, state.ctrl_plane_mut()); + + // cmpxchg16b (mem) + Inst::LockCmpxchg16b { + replacement_low: operand_low, + replacement_high: operand_high, + expected_low: dst_old_low.to_reg(), + expected_high: dst_old_high.to_reg(), + mem: Box::new(mem.into()), + dst_old_low, + dst_old_high, + } + .emit(sink, info, state); + + // jnz again + one_way_jmp(sink, CC::NZ, again_label); + } + Inst::Fence { kind } => { sink.put1(0x0F); sink.put1(0xAE); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 287cc3bf4cd4..1b52d074923a 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4984,6 +4984,20 @@ fn test_x64_emit() { "lock cmpxchgq %r10, -12345(%rcx,%rsi,8), expected=%rax, dst_old=%rax", )); + insns.push(( + Inst::LockCmpxchg16b { + mem: Box::new(am2.clone()), + replacement_low: rbx, + replacement_high: rcx, + expected_low: rax, + expected_high: rdx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "F0480FC78CF1C7CFFFFF", + "lock cmpxchg16b -12345(%rcx,%rsi,8), replacement=%rcx:%rbx, expected=%rdx:%rax, dst_old=%rdx:%rax", + )); + // AtomicRmwSeq insns.push(( Inst::AtomicRmwSeq { @@ -5046,6 +5060,75 @@ fn test_x64_emit() { "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" )); + // Atomic128RmwSeq + insns.push(( + Inst::Atomic128RmwSeq { + op: inst_common::MachAtomicRmwOp::Or, + mem: Box::new(am3.clone()), + operand_low: r10, + operand_high: r11, + temp_low: w_rbx, + temp_high: w_rcx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "498B01498B51084889C34889D14C09D34C09D9F0490FC7090F85E9FFFFFF", + "atomically { %rdx:%rax = 0(%r9); %rcx:%rbx = %rdx:%rax Or %r11:%r10; 0(%r9) = %rcx:%rbx }", + )); + insns.push(( + Inst::Atomic128RmwSeq { + op: inst_common::MachAtomicRmwOp::And, + mem: Box::new(am3.clone()), + operand_low: r10, + operand_high: r11, + temp_low: w_rbx, + temp_high: w_rcx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "498B01498B51084889C34889D14C21D34C21D9F0490FC7090F85E9FFFFFF", + "atomically { %rdx:%rax = 0(%r9); %rcx:%rbx = %rdx:%rax And %r11:%r10; 0(%r9) = %rcx:%rbx }" + )); + insns.push(( + Inst::Atomic128RmwSeq { + op: inst_common::MachAtomicRmwOp::Umin, + mem: Box::new(am3.clone()), + operand_low: r10, + operand_high: r11, + temp_low: w_rbx, + temp_high: w_rcx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "498B01498B51084889C34889D14C39D34C19D94889D1490F43DA490F43CBF0490FC7090F85DEFFFFFF", + "atomically { %rdx:%rax = 0(%r9); %rcx:%rbx = %rdx:%rax Umin %r11:%r10; 0(%r9) = %rcx:%rbx }" + )); + insns.push(( + Inst::Atomic128RmwSeq { + op: inst_common::MachAtomicRmwOp::Add, + mem: Box::new(am3.clone()), + operand_low: r10, + operand_high: r11, + temp_low: w_rbx, + temp_high: w_rcx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "498B01498B51084889C34889D14C01D34C11D9F0490FC7090F85E9FFFFFF", + "atomically { %rdx:%rax = 0(%r9); %rcx:%rbx = %rdx:%rax Add %r11:%r10; 0(%r9) = %rcx:%rbx }" + )); + insns.push(( + Inst::Atomic128XchgSeq { + mem: am3.clone(), + operand_low: rbx, + operand_high: rcx, + dst_old_low: w_rax, + dst_old_high: w_rdx, + }, + "498B01498B5108F0490FC7090F85F5FFFFFF", + "atomically { %rdx:%rax = 0(%r9); 0(%r9) = %rcx:%rbx }", + )); + // Fence insns.push(( Inst::Fence { @@ -5115,6 +5198,7 @@ fn test_x64_emit() { use crate::settings::Configurable; let mut isa_flag_builder = x64::settings::builder(); + isa_flag_builder.enable("has_cmpxchg16b").unwrap(); isa_flag_builder.enable("has_ssse3").unwrap(); isa_flag_builder.enable("has_sse41").unwrap(); isa_flag_builder.enable("has_fma").unwrap(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a999c938d871..3e8aea72f5e5 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -140,6 +140,10 @@ impl Inst { | Inst::GetRip { .. } | Inst::AluConstOp { .. } => smallvec![], + Inst::LockCmpxchg16b { .. } + | Inst::Atomic128RmwSeq { .. } + | Inst::Atomic128XchgSeq { .. } => smallvec![InstructionSet::CMPXCHG16b], + Inst::AluRmRVex { op, .. } => op.available_from(), Inst::UnaryRmR { op, .. } => op.available_from(), Inst::UnaryRmRVex { op, .. } => op.available_from(), @@ -1816,6 +1820,28 @@ impl PrettyPrint for Inst { ) } + Inst::LockCmpxchg16b { + replacement_low, + replacement_high, + expected_low, + expected_high, + mem, + dst_old_low, + dst_old_high, + .. + } => { + let replacement_low = pretty_print_reg(*replacement_low, 8); + let replacement_high = pretty_print_reg(*replacement_high, 8); + let expected_low = pretty_print_reg(*expected_low, 8); + let expected_high = pretty_print_reg(*expected_high, 8); + let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8); + let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8); + let mem = mem.pretty_print(16); + format!( + "lock cmpxchg16b {mem}, replacement={replacement_high}:{replacement_low}, expected={expected_high}:{expected_low}, dst_old={dst_old_high}:{dst_old_low}" + ) + } + Inst::AtomicRmwSeq { ty, op, .. } => { let ty = ty.bits(); format!( @@ -1823,6 +1849,41 @@ impl PrettyPrint for Inst { ) } + Inst::Atomic128RmwSeq { + op, + mem, + operand_low, + operand_high, + temp_low, + temp_high, + dst_old_low, + dst_old_high, + } => { + let operand_low = pretty_print_reg(*operand_low, 8); + let operand_high = pretty_print_reg(*operand_high, 8); + let temp_low = pretty_print_reg(temp_low.to_reg(), 8); + let temp_high = pretty_print_reg(temp_high.to_reg(), 8); + let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8); + let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8); + let mem = mem.pretty_print(16); + format!("atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}") + } + + Inst::Atomic128XchgSeq { + mem, + operand_low, + operand_high, + dst_old_low, + dst_old_high, + } => { + let operand_low = pretty_print_reg(*operand_low, 8); + let operand_high = pretty_print_reg(*operand_high, 8); + let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8); + let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8); + let mem = mem.pretty_print(16); + format!("atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}") + } + Inst::Fence { kind } => match kind { FenceKind::MFence => "mfence".to_string(), FenceKind::LFence => "lfence".to_string(), @@ -2467,6 +2528,25 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands(collector); } + Inst::LockCmpxchg16b { + replacement_low, + replacement_high, + expected_low, + expected_high, + mem, + dst_old_low, + dst_old_high, + .. + } => { + collector.reg_fixed_use(replacement_low, regs::rbx()); + collector.reg_fixed_use(replacement_high, regs::rcx()); + collector.reg_fixed_use(expected_low, regs::rax()); + collector.reg_fixed_use(expected_high, regs::rdx()); + collector.reg_fixed_def(dst_old_low, regs::rax()); + collector.reg_fixed_def(dst_old_high, regs::rdx()); + mem.get_operands(collector); + } + Inst::AtomicRmwSeq { operand, temp, @@ -2482,6 +2562,42 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { mem.get_operands_late(collector) } + Inst::Atomic128RmwSeq { + operand_low, + operand_high, + temp_low, + temp_high, + dst_old_low, + dst_old_high, + mem, + .. + } => { + // All registers are collected in the `Late` position so that they don't overlap. + collector.reg_late_use(operand_low); + collector.reg_late_use(operand_high); + collector.reg_fixed_def(temp_low, regs::rbx()); + collector.reg_fixed_def(temp_high, regs::rcx()); + collector.reg_fixed_def(dst_old_low, regs::rax()); + collector.reg_fixed_def(dst_old_high, regs::rdx()); + mem.get_operands_late(collector) + } + + Inst::Atomic128XchgSeq { + operand_low, + operand_high, + dst_old_low, + dst_old_high, + mem, + .. + } => { + // All registers are collected in the `Late` position so that they don't overlap. + collector.reg_fixed_late_use(operand_low, regs::rbx()); + collector.reg_fixed_late_use(operand_high, regs::rcx()); + collector.reg_fixed_def(dst_old_low, regs::rax()); + collector.reg_fixed_def(dst_old_high, regs::rdx()); + mem.get_operands_late(collector) + } + Inst::Args { args } => { for ArgPair { vreg, preg } in args { collector.reg_fixed_def(vreg, *preg); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index ea648e9f752c..cb15cb725c90 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3348,31 +3348,40 @@ ;; sequencing to satisfy the CLIF synchronisation requirements for `AtomicLoad` ;; without the need for any fence instructions. ;; -;; As described in the `atomic_load` documentation, this lowering is only valid -;; for I8, I16, I32, and I64. The sub-64-bit types are zero extended, as with a -;; normal load. +;; This lowering is only valid for I8, I16, I32, and I64. The sub-64-bit types +;; are zero extended, as with a normal load. (rule 1 (lower (has_type $I64 (atomic_load flags address))) (x64_mov (to_amode flags address (zero_offset)))) (rule (lower (has_type (and (fits_in_32 ty) (ty_int _)) (atomic_load flags address))) (x64_movzx (ext_mode (ty_bits_u16 ty) 64) (to_amode flags address (zero_offset)))) +;; Lower 128-bit `atomic_load` using `cmpxchg16b`. +(rule 1 (lower (has_type $I128 (atomic_load flags address))) + (if-let $true (use_cmpxchg16b)) + (x64_cmpxchg16b (value_regs (imm $I64 0) (imm $I64 0)) (value_regs (imm $I64 0) (imm $I64 0)) (to_amode flags address (zero_offset)))) ;; Rules for `atomic_store` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; This is a normal store followed by an `mfence` instruction. As described in -;; the `atomic_load` documentation, this lowering is only valid for I8, I16, -;; I32, and I64. +;; This is a normal store followed by an `mfence` instruction. This lowering is +;; only valid for I8, I16, I32, and I64. (rule (lower (atomic_store flags value @ (value_type (and (fits_in_64 ty) (ty_int _))) address)) (side_effect (side_effect_concat (x64_movrm ty (to_amode flags address (zero_offset)) value) (x64_mfence)))) +;; Lower 128-bit `atomic_store` using `cmpxchg16b`. +(rule 1 (lower (atomic_store flags value @ (value_type $I128) address)) + (if-let $true (use_cmpxchg16b)) + (side_effect (x64_atomic_128_store_seq (to_amode flags address (zero_offset)) value))) ;; Rules for `atomic_cas` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) (atomic_cas flags address expected replacement))) (x64_cmpxchg ty expected replacement (to_amode flags address (zero_offset)))) +(rule 1 (lower (has_type $I128 (atomic_cas flags address expected replacement))) + (if-let $true (use_cmpxchg16b)) + (x64_cmpxchg16b expected replacement (to_amode flags address (zero_offset)))) ;; Rules for `atomic_rmw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3389,6 +3398,9 @@ (rule (lower (has_type (and (fits_in_64 ty) (ty_int _)) (atomic_rmw flags op address input))) (x64_atomic_rmw_seq ty op (to_amode flags address (zero_offset)) input)) +(rule 1 (lower (has_type $I128 (atomic_rmw flags op address input))) + (if-let $true (use_cmpxchg16b)) + (x64_atomic_128_rmw_seq op (to_amode flags address (zero_offset)) input)) ;; Rules for `call` and `call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 6b6473c0704a..ed61a4d92654 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -39,6 +39,7 @@ type BoxCallIndInfo = Box>; type BoxReturnCallInfo = Box>; type BoxReturnCallIndInfo = Box>; type VecArgPair = Vec; +type BoxSyntheticAmode = Box; pub struct SinkableLoad { inst: Inst, @@ -240,6 +241,11 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { self.backend.x64_flags.use_sse42() } + #[inline] + fn use_cmpxchg16b(&mut self) -> bool { + self.backend.x64_flags.use_cmpxchg16b() + } + #[inline] fn imm8_from_value(&mut self, val: Value) -> Option { let inst = self.lower_ctx.dfg().value_def(val).inst()?; @@ -614,6 +620,15 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { MachAtomicRmwOp::from(*op) } + #[inline] + fn mach_atomic_rmw_op_is_xchg(&mut self, op: &MachAtomicRmwOp) -> Option<()> { + if *op == MachAtomicRmwOp::Xchg { + Some(()) + } else { + None + } + } + #[inline] fn preg_rbp(&mut self) -> PReg { regs::rbp().to_real_reg().unwrap().into() @@ -939,6 +954,10 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { let reg = Gpr::new(self.invalid_reg()).unwrap(); WritableGpr::from_reg(reg) } + + fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode { + Box::new(amode.clone()) + } } impl IsleContext<'_, '_, MInst, X64Backend> { diff --git a/cranelift/codegen/src/isa/x64/pcc.rs b/cranelift/codegen/src/isa/x64/pcc.rs index 51c599799a6e..e29908429815 100644 --- a/cranelift/codegen/src/isa/x64/pcc.rs +++ b/cranelift/codegen/src/isa/x64/pcc.rs @@ -886,6 +886,18 @@ pub(crate) fn check( Ok(()) } + Inst::LockCmpxchg16b { + ref mem, + dst_old_low, + dst_old_high, + .. + } => { + ensure_no_fact(vcode, dst_old_low.to_reg())?; + ensure_no_fact(vcode, dst_old_high.to_reg())?; + check_store(ctx, None, mem, vcode, I128)?; + Ok(()) + } + Inst::AtomicRmwSeq { ref mem, temp, @@ -898,6 +910,34 @@ pub(crate) fn check( Ok(()) } + Inst::Atomic128RmwSeq { + ref mem, + temp_low, + temp_high, + dst_old_low, + dst_old_high, + .. + } => { + ensure_no_fact(vcode, dst_old_low.to_reg())?; + ensure_no_fact(vcode, dst_old_high.to_reg())?; + ensure_no_fact(vcode, temp_low.to_reg())?; + ensure_no_fact(vcode, temp_high.to_reg())?; + check_store(ctx, None, mem, vcode, I128)?; + Ok(()) + } + + Inst::Atomic128XchgSeq { + ref mem, + dst_old_low, + dst_old_high, + .. + } => { + ensure_no_fact(vcode, dst_old_low.to_reg())?; + ensure_no_fact(vcode, dst_old_high.to_reg())?; + check_store(ctx, None, mem, vcode, I128)?; + Ok(()) + } + Inst::Fence { .. } => Ok(()), Inst::XmmUninitializedValue { dst } => { diff --git a/cranelift/filetests/filetests/isa/x64/atomic-128.clif b/cranelift/filetests/filetests/isa/x64/atomic-128.clif new file mode 100644 index 000000000000..791578b6753e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/atomic-128.clif @@ -0,0 +1,600 @@ +test compile precise-output +set enable_llvm_abi_extensions +target x86_64 has_cmpxchg16b + +function %load(i64) -> i128 { +block0(v0: i64): + v1 = atomic_load.i128 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; xorq %rax, %rax, %rax +; xorq %rdx, %rdx, %rdx +; xorq %rbx, %rbx, %rbx +; xorq %rcx, %rcx, %rcx +; lock cmpxchg16b 0(%rdi), replacement=%rcx:%rbx, expected=%rdx:%rax, dst_old=%rdx:%rax +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; xorq %rax, %rax +; xorq %rdx, %rdx +; xorq %rbx, %rbx +; xorq %rcx, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %store(i128, i64) { +block0(v0: i128, v1: i64): + atomic_store.i128 v0, v1 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rsi, %rcx +; movq %rdi, %rbx +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%r11); 0(%r11) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rsi, %rcx +; movq %rdi, %rbx +; movq %rdx, %r11 +; movq (%r11), %rax ; trap: heap_oob +; movq 8(%r11), %rdx ; trap: heap_oob +; lock cmpxchg16b (%r11) ; trap: heap_oob +; jne 0x1c +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %cas(i64, i128, i128) -> i128 { +block0(v0: i64, v1: i128, v2: i128): + v3 = atomic_cas.i128 v0, v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rcx, %rbx +; movq %r8, %rcx +; movq %rsi, %rax +; lock cmpxchg16b 0(%rdi), replacement=%rcx:%rbx, expected=%rdx:%rax, dst_old=%rdx:%rax +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rcx, %rbx +; movq %r8, %rcx +; movq %rsi, %rax +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %add(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 add v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Add %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; addq %rsi, %rbx +; adcq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sub(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 sub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Sub %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; subq %rsi, %rbx +; sbbq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %and(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 and v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax And %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; andq %rsi, %rbx +; andq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %nand(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 nand v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Nand %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; andq %rsi, %rbx +; andq %r11, %rcx +; notq %rbx +; notq %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %or(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 or v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Or %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; orq %rsi, %rbx +; orq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xor(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 xor v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Xor %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; xorq %rsi, %rbx +; xorq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %xchg(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 xchg v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %rcx +; movq %rsi, %rbx +; atomically { %rdx:%rax = 0(%rdi); 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %rcx +; movq %rsi, %rbx +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x19 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umin(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 umin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Umin %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; cmpq %rsi, %rbx +; sbbq %r11, %rcx +; movq %rdx, %rcx +; cmovaeq %rsi, %rbx +; cmovaeq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 umax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Umax %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; cmpq %rsi, %rbx +; sbbq %r11, %rcx +; movq %rdx, %rcx +; cmovbq %rsi, %rbx +; cmovbq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %smin(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 smin v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Smin %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; cmpq %rsi, %rbx +; sbbq %r11, %rcx +; movq %rdx, %rcx +; cmovgeq %rsi, %rbx +; cmovgeq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + +function %umax(i64, i128) -> i128 { +block0(v0: i64, v1: i128): + v2 = atomic_rmw.i128 smax v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %r11 +; atomically { %rdx:%rax = 0(%rdi); %rcx:%rbx = %rdx:%rax Smax %r11:%rsi; 0(%rdi) = %rcx:%rbx } +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x10, %rsp +; movq %rbx, (%rsp) +; block1: ; offset 0xc +; movq %rdx, %r11 +; movq (%rdi), %rax ; trap: heap_oob +; movq 8(%rdi), %rdx ; trap: heap_oob +; movq %rax, %rbx +; movq %rdx, %rcx +; cmpq %rsi, %rbx +; sbbq %r11, %rcx +; movq %rdx, %rcx +; cmovlq %rsi, %rbx +; cmovlq %r11, %rcx +; lock cmpxchg16b (%rdi) ; trap: heap_oob +; jne 0x16 +; movq (%rsp), %rbx +; addq $0x10, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/runtests/atomic-128.clif b/cranelift/filetests/filetests/runtests/atomic-128.clif new file mode 100644 index 000000000000..8ffe27ab457a --- /dev/null +++ b/cranelift/filetests/filetests/runtests/atomic-128.clif @@ -0,0 +1,274 @@ +test interpret +test run +set enable_llvm_abi_extensions +target x86_64 has_cmpxchg16b + +function %atomic_load(i128) -> i128 { + ss0 = explicit_slot 16 + +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_addr.i64 ss0 + v2 = atomic_load.i128 v1 + return v2 +} +; run: %atomic_load(0) == 0 +; run: %atomic_load(-1) == -1 +; run: %atomic_load(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %atomic_load(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %atomic_load(0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == 0xFEDCBA98_76543210_F7E6D5C4_B3A29180 +; run: %atomic_load(0xA00A00A0_0A00A00A_00A00A00_A00A00A0) == 0xA00A00A0_0A00A00A_00A00A00_A00A00A0 +; run: %atomic_load(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678) == 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678 + + +function %atomic_store(i128) -> i128 { + ss0 = explicit_slot 16 + +block0(v0: i128): + v1 = stack_addr.i64 ss0 + atomic_store.i128 v0, v1 + v2 = stack_load.i128 ss0 + return v2 +} +; run: %atomic_store(0) == 0 +; run: %atomic_store(-1) == -1 +; run: %atomic_store(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %atomic_store(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %atomic_store(0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == 0xFEDCBA98_76543210_F7E6D5C4_B3A29180 +; run: %atomic_store(0xA00A00A0_0A00A00A_00A00A00_A00A00A0) == 0xA00A00A0_0A00A00A_00A00A00_A00A00A0 +; run: %atomic_store(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678) == 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678 + + +function %atomic_cas(i128, i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128, v2: i128): + stack_store.i128 v0, ss0 + v3 = stack_addr.i64 ss0 + v4 = atomic_cas.i128 v3, v1, v2 + v5 = stack_load.i128 ss0 + return v5, v4 +} + +; run: %atomic_cas(0, 0, 2) == [2, 0] +; run: %atomic_cas(1, 0, 2) == [1, 1] +; run: %atomic_cas(0, 1, 2) == [0, 0] +; run: %atomic_cas(0, 0xC0FFEEEE_ABCDEF01_00000000_00000000, 0xDECAFFFF_12345678) == [0, 0] +; run: %atomic_cas(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xFEDCBA98_76543210_F7E6D5C4_B3A29180, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_add(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 add v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_add(0, 0) == [0, 0] +; run: %atomic_add(1, 0) == [1, 1] +; run: %atomic_add(0, 1) == [1, 0] +; run: %atomic_add(1, 1) == [2, 1] +; run: %atomic_add(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_add(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xBFDCA987_22222112_D6B1D5C3_C5D6E7F8, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_sub(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 sub v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_sub(0, 0) == [0, 0] +; run: %atomic_sub(1, 0) == [1, 1] +; run: %atomic_sub(0, 1) == [-1, 0] +; run: %atomic_sub(1, 1) == [0, 1] +; run: %atomic_sub(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0x3F001111_543210FF_DECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_sub(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xC2233456_3579BCF0_E6E42A3A_5E91C4F8, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_and(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 and v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_and(0, 0) == [0, 0] +; run: %atomic_and(1, 0) == [0, 1] +; run: %atomic_and(0, 1) == [0, 0] +; run: %atomic_and(1, 1) == [1, 1] +; run: %atomic_and(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0, 0xDECAFFFF_12345678] +; run: %atomic_and(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xC0DCAA88_22442200_D6C2D5C4_12201000, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_nand(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 nand v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_nand(0, 0) == [-1, 0] +; run: %atomic_nand(1, 0) == [-1, 1] +; run: %atomic_nand(0, 1) == [-1, 0] +; run: %atomic_nand(1, 1) == [-2, 1] +; run: %atomic_nand(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [-1, 0xDECAFFFF_12345678] +; run: %atomic_nand(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0x3F235577_DDBBDDFF_293D2A3B_EDDFEFFF, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_or(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 or v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_or(0, 0) == [0, 0] +; run: %atomic_or(1, 0) == [1, 1] +; run: %atomic_or(0, 1) == [1, 0] +; run: %atomic_or(1, 1) == [1, 1] +; run: %atomic_or(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_or(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xFEFFFEFE_FFDDFF11_FFEEFFFF_B3B6D7F8, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_xor(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 xor v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_xor(0, 0) == [0, 0] +; run: %atomic_xor(1, 0) == [1, 1] +; run: %atomic_xor(0, 1) == [1, 0] +; run: %atomic_xor(1, 1) == [0, 1] +; run: %atomic_xor(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_xor(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0x3E235476_DD99DD11_292C2A3B_A196C7F8, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_xchg(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 xchg v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_xchg(0, 0) == [0, 0] +; run: %atomic_xchg(1, 0) == [0, 1] +; run: %atomic_xchg(0, 1) == [1, 0] +; run: %atomic_xchg(1, 1) == [1, 1] +; run: %atomic_xchg(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_00000000_00000000, 0xDECAFFFF_12345678] +; run: %atomic_xchg(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xFEDCBA98_76543210_F7E6D5C4_B3A29180, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_umin(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 umin v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_umin(0, 0) == [0, 0] +; run: %atomic_umin(1, 0) == [0, 1] +; run: %atomic_umin(0, 1) == [0, 0] +; run: %atomic_umin(1, 1) == [1, 1] +; run: %atomic_umin(-1, 1) == [1, -1] +; run: %atomic_umin(1, -1) == [1, 1] +; run: %atomic_umin(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xDECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_umin(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_umax(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 umax v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_umax(0, 0) == [0, 0] +; run: %atomic_umax(1, 0) == [1, 1] +; run: %atomic_umax(0, 1) == [1, 0] +; run: %atomic_umax(1, 1) == [1, 1] +; run: %atomic_umax(-1, 1) == [-1, -1] +; run: %atomic_umax(1, -1) == [-1, 1] +; run: %atomic_umax(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_00000000_00000000, 0xDECAFFFF_12345678] +; run: %atomic_umax(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xFEDCBA98_76543210_F7E6D5C4_B3A29180, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_smin(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 smin v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_smin(0, 0) == [0, 0] +; run: %atomic_smin(1, 0) == [0, 1] +; run: %atomic_smin(0, 1) == [0, 0] +; run: %atomic_smin(1, 1) == [1, 1] +; run: %atomic_smin(-1, 1) == [-1, -1] +; run: %atomic_smin(1, -1) == [-1, 1] +; run: %atomic_smin(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xC0FFEEEE_ABCDEF01_00000000_00000000, 0xDECAFFFF_12345678] +; run: %atomic_smin(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] + + +function %atomic_smax(i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128): + stack_store.i128 v0, ss0 + v2 = stack_addr.i64 ss0 + v3 = atomic_rmw.i128 smax v2, v1 + v4 = stack_load.i128 ss0 + return v4, v3 +} + +; run: %atomic_smax(0, 0) == [0, 0] +; run: %atomic_smax(1, 0) == [1, 1] +; run: %atomic_smax(0, 1) == [1, 0] +; run: %atomic_smax(1, 1) == [1, 1] +; run: %atomic_smax(-1, 1) == [1, -1] +; run: %atomic_smax(1, -1) == [1, 1] +; run: %atomic_smax(0xDECAFFFF_12345678, 0xC0FFEEEE_ABCDEF01_00000000_00000000) == [0xDECAFFFF_12345678, 0xDECAFFFF_12345678] +; run: %atomic_smax(0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678, 0xFEDCBA98_76543210_F7E6D5C4_B3A29180) == [0xFEDCBA98_76543210_F7E6D5C4_B3A29180, 0xC0FFEEEE_ABCDEF01_DECAFFFF_12345678] diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index d8145acfb4cc..382a2ac19824 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -1051,6 +1051,12 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::FcvtFromSint, &[I8X16], &[F64X2]), (Opcode::FcvtFromSint, &[I16X8], &[F64X2]), (Opcode::FcvtFromSint, &[I32X4], &[F64X2]), + // Only supported on x64 with a feature at this time, so 128-bit + // atomics are not suitable to fuzz yet. + (Opcode::AtomicRmw, _, &[I128]), + (Opcode::AtomicCas, _, &[I128]), + (Opcode::AtomicLoad, _, &[I128]), + (Opcode::AtomicStore, &[I128, _], _), ) }) .filter(|(op, ..)| { diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index a9d638b188ba..f159a33d5ed7 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -49,6 +49,9 @@ pub fn infer_native_flags(isa_builder: &mut dyn Configurable) -> Result<(), &'st return Err("x86 support requires SSE2"); } + if std::is_x86_feature_detected!("cmpxchg16b") { + isa_builder.enable("has_cmpxchg16b").unwrap(); + } if std::is_x86_feature_detected!("sse3") { isa_builder.enable("has_sse3").unwrap(); } diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index 83728c855829..3dfefb2e3ade 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -29,7 +29,7 @@ wasm-encoder = { workspace = true } wasm-smith = { workspace = true } wasm-mutate = { workspace = true } wasm-spec-interpreter = { path = "./wasm-spec-interpreter", optional = true } -wasmi = "0.31.1" +wasmi = "0.38.0" futures = { workspace = true } # We rely on precompiled v8 binaries, but rusty-v8 doesn't have a precompiled diff --git a/crates/fuzzing/src/generators/codegen_settings.rs b/crates/fuzzing/src/generators/codegen_settings.rs index 62d58c4e85da..c394c2326c02 100644 --- a/crates/fuzzing/src/generators/codegen_settings.rs +++ b/crates/fuzzing/src/generators/codegen_settings.rs @@ -98,6 +98,7 @@ impl<'a> Arbitrary<'a> for CodegenSettings { "x86_64" => { test: is_x86_feature_detected, + std:"cmpxchg16b" => clif:"has_cmpxchg16b", std:"sse3" => clif:"has_sse3", std:"ssse3" => clif:"has_ssse3", std:"sse4.1" => clif:"has_sse41", diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 782a1aaf9b07..791cce252790 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -9,7 +9,7 @@ use anyhow::Result; use arbitrary::{Arbitrary, Unstructured}; use std::sync::Arc; use std::time::Duration; -use wasmtime::{Engine, Module, Store}; +use wasmtime::{Engine, Module, MpkEnabled, Store}; /// Configuration for `wasmtime::Config` and generated modules for a session of /// fuzzing. @@ -78,6 +78,12 @@ impl Config { pooling.total_memories = config.max_memories as u32; pooling.max_memory_size = 10 << 16; pooling.max_memories_per_module = config.max_memories as u32; + if pooling.memory_protection_keys == MpkEnabled::Auto + && pooling.max_memory_protection_keys > 1 + { + pooling.total_memories = + pooling.total_memories * (pooling.max_memory_protection_keys as u32); + } pooling.total_tables = config.max_tables as u32; pooling.table_elements = 1_000; diff --git a/crates/fuzzing/src/generators/pooling_config.rs b/crates/fuzzing/src/generators/pooling_config.rs index 46e588ca05b4..7fbe8c171472 100644 --- a/crates/fuzzing/src/generators/pooling_config.rs +++ b/crates/fuzzing/src/generators/pooling_config.rs @@ -69,6 +69,7 @@ impl PoolingAllocationConfig { cfg.async_stack_keep_resident(self.async_stack_keep_resident); cfg.memory_protection_keys(self.memory_protection_keys); + cfg.max_memory_protection_keys(self.max_memory_protection_keys); cfg } @@ -115,7 +116,7 @@ impl<'a> Arbitrary<'a> for PoolingAllocationConfig { async_stack_keep_resident: u.int_in_range(0..=1 << 20)?, memory_protection_keys: *u.choose(&[MpkEnabled::Auto, MpkEnabled::Disable])?, - max_memory_protection_keys: u.int_in_range(0..=20)?, + max_memory_protection_keys: u.int_in_range(1..=20)?, }) } } diff --git a/crates/fuzzing/src/oracles/diff_wasmi.rs b/crates/fuzzing/src/oracles/diff_wasmi.rs index 0c8a3d95efb1..0a523f5eb17a 100644 --- a/crates/fuzzing/src/oracles/diff_wasmi.rs +++ b/crates/fuzzing/src/oracles/diff_wasmi.rs @@ -21,8 +21,6 @@ impl WasmiEngine { config.exceptions_enabled = false; config.gc_enabled = false; config.wide_arithmetic_enabled = false; - config.max_memories = config.max_memories.min(1); - config.min_memories = config.min_memories.min(1); let mut wasmi_config = wasmi::Config::default(); wasmi_config @@ -35,11 +33,32 @@ impl WasmiEngine { .wasm_bulk_memory(config.bulk_memory_enabled) .wasm_reference_types(config.reference_types_enabled) .wasm_tail_call(config.tail_call_enabled) + .wasm_multi_memory(config.max_memories > 1) .wasm_extended_const(true); Self { engine: wasmi::Engine::new(&wasmi_config), } } + + fn trap_code(&self, err: &Error) -> Option { + let err = err.downcast_ref::()?; + if let Some(code) = err.as_trap_code() { + return Some(code); + } + + match err.kind() { + wasmi::errors::ErrorKind::Instantiation( + wasmi::errors::InstantiationError::ElementSegmentDoesNotFit { .. }, + ) => Some(wasmi::core::TrapCode::TableOutOfBounds), + wasmi::errors::ErrorKind::Memory(wasmi::errors::MemoryError::OutOfBoundsAccess) => { + Some(wasmi::core::TrapCode::MemoryOutOfBounds) + } + _ => { + log::trace!("unknown wasmi error: {:?}", err.kind()); + None + } + } + } } impl DiffEngine for WasmiEngine { @@ -59,53 +78,17 @@ impl DiffEngine for WasmiEngine { } fn assert_error_match(&self, trap: &Trap, err: &Error) { - // Acquire a `wasmi::Trap` from the wasmi error which we'll use to - // assert that it has the same kind of trap as the wasmtime-based trap. - let wasmi = match err.downcast_ref::() { - Some(wasmi::Error::Trap(trap)) => trap, - - // Out-of-bounds data segments turn into this category which - // Wasmtime reports as a `MemoryOutOfBounds`. - Some(wasmi::Error::Memory(msg)) => { - assert_eq!( - *trap, - Trap::MemoryOutOfBounds, - "wasmtime error did not match wasmi: {msg}" - ); - return; - } - - // Ignore this for now, looks like "elements segment does not fit" - // falls into this category and to avoid doing string matching this - // is just ignored. - Some(wasmi::Error::Instantiation(msg)) => { - log::debug!("ignoring wasmi instantiation error: {msg}"); - return; - } - - Some(other) => panic!("unexpected wasmi error: {other}"), - - None => err - .downcast_ref::() - .expect(&format!("not a trap: {err:?}")), - }; - assert!(wasmi.trap_code().is_some()); - assert_eq!( - wasmi_to_wasmtime_trap_code(wasmi.trap_code().unwrap()), - *trap - ); + match self.trap_code(err) { + Some(code) => assert_eq!(wasmi_to_wasmtime_trap_code(code), *trap), + None => panic!("unexpected wasmi error {err:?}"), + } } fn is_stack_overflow(&self, err: &Error) -> bool { - let trap = match err.downcast_ref::() { - Some(wasmi::Error::Trap(trap)) => trap, - Some(_) => return false, - None => match err.downcast_ref::() { - Some(trap) => trap, - None => return false, - }, - }; - matches!(trap.trap_code(), Some(wasmi::core::TrapCode::StackOverflow)) + matches!( + self.trap_code(err), + Some(wasmi::core::TrapCode::StackOverflow) + ) } } @@ -150,7 +133,7 @@ impl DiffInstance for WasmiInstance { .and_then(wasmi::Extern::into_func) .unwrap(); let arguments: Vec<_> = arguments.iter().map(|x| x.into()).collect(); - let mut results = vec![wasmi::Value::I32(0); result_tys.len()]; + let mut results = vec![wasmi::Val::I32(0); result_tys.len()]; function .call(&mut self.store, &arguments, &mut results) .context("wasmi function trap")?; @@ -183,9 +166,9 @@ impl DiffInstance for WasmiInstance { } } -impl From<&DiffValue> for wasmi::Value { +impl From<&DiffValue> for wasmi::Val { fn from(v: &DiffValue) -> Self { - use wasmi::Value as WasmiValue; + use wasmi::Val as WasmiValue; match *v { DiffValue::I32(n) => WasmiValue::I32(n), DiffValue::I64(n) => WasmiValue::I64(n), @@ -205,9 +188,9 @@ impl From<&DiffValue> for wasmi::Value { } } -impl From for DiffValue { - fn from(value: wasmi::Value) -> Self { - use wasmi::Value as WasmiValue; +impl From for DiffValue { + fn from(value: wasmi::Val) -> Self { + use wasmi::Val as WasmiValue; match value { WasmiValue::I32(n) => DiffValue::I32(n), WasmiValue::I64(n) => DiffValue::I64(n), diff --git a/crates/fuzzing/src/oracles/engine.rs b/crates/fuzzing/src/oracles/engine.rs index 5c7cf69c547a..887f8382b36c 100644 --- a/crates/fuzzing/src/oracles/engine.rs +++ b/crates/fuzzing/src/oracles/engine.rs @@ -93,14 +93,17 @@ pub fn setup_engine_runtimes() { /// Build a list of allowed values from the given `defaults` using the /// `env_list`. /// +/// The entries in `defaults` are preserved, in order, and are replaced with +/// `None` in the returned list if they are disabled. +/// /// ``` /// # use wasmtime_fuzzing::oracles::engine::build_allowed_env_list; /// // Passing no `env_list` returns the defaults: -/// assert_eq!(build_allowed_env_list(None, &["a"]), vec!["a"]); +/// assert_eq!(build_allowed_env_list(None, &["a"]), vec![Some("a")]); /// // We can build up a subset of the defaults: -/// assert_eq!(build_allowed_env_list(Some(vec!["b".to_string()]), &["a","b"]), vec!["b"]); +/// assert_eq!(build_allowed_env_list(Some(vec!["b".to_string()]), &["a","b"]), vec![None, Some("b")]); /// // Alternately we can subtract from the defaults: -/// assert_eq!(build_allowed_env_list(Some(vec!["-a".to_string()]), &["a","b"]), vec!["b"]); +/// assert_eq!(build_allowed_env_list(Some(vec!["-a".to_string()]), &["a","b"]), vec![None, Some("b")]); /// ``` /// ```should_panic /// # use wasmtime_fuzzing::oracles::engine::build_allowed_env_list; @@ -116,7 +119,7 @@ pub fn setup_engine_runtimes() { pub fn build_allowed_env_list<'a>( env_list: Option>, defaults: &[&'a str], -) -> Vec<&'a str> { +) -> Vec> { if let Some(configured) = &env_list { // Check that the names are either all additions or all subtractions. let subtract_from_defaults = configured.iter().all(|c| c.starts_with("-")); @@ -141,12 +144,14 @@ pub fn build_allowed_env_list<'a>( for &d in defaults { let mentioned = configured.iter().any(|c| &c[start..] == d); if (add_from_defaults && mentioned) || (subtract_from_defaults && !mentioned) { - allowed.push(d); + allowed.push(Some(d)); + } else { + allowed.push(None); } } allowed } else { - defaults.to_vec() + defaults.iter().copied().map(Some).collect() } } diff --git a/crates/test-macros/src/lib.rs b/crates/test-macros/src/lib.rs index bf59adb89c8f..488ba5e45ab5 100644 --- a/crates/test-macros/src/lib.rs +++ b/crates/test-macros/src/lib.rs @@ -218,7 +218,7 @@ fn expand(test_config: &TestConfig, func: Fn) -> Result { }; let func_name = &func.sig.ident; let ret = match &func.sig.output { - ReturnType::Default => quote! { () }, + ReturnType::Default => quote! {}, ReturnType::Type(_, ty) => quote! { -> #ty }, }; let test_name = Ident::new( diff --git a/crates/wasi-nn/Cargo.toml b/crates/wasi-nn/Cargo.toml index 48ac66e44167..1f218a60434f 100644 --- a/crates/wasi-nn/Cargo.toml +++ b/crates/wasi-nn/Cargo.toml @@ -30,7 +30,7 @@ wasmtime = { workspace = true, features = [ # These dependencies are necessary for the wasi-nn implementation: tracing = { workspace = true } thiserror = { workspace = true } -openvino = { version = "0.7.2", features = [ +openvino = { version = "0.8.0", features = [ "runtime-linking", ], optional = true } diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index dd7fe6c987ee..c8a9b789c564 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -3136,6 +3136,7 @@ fn detect_host_feature(feature: &str) -> Option { #[cfg(target_arch = "x86_64")] { return match feature { + "cmpxchg16b" => Some(std::is_x86_feature_detected!("cmpxchg16b")), "sse3" => Some(std::is_x86_feature_detected!("sse3")), "ssse3" => Some(std::is_x86_feature_detected!("ssse3")), "sse4.1" => Some(std::is_x86_feature_detected!("sse4.1")), diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index e0c0a366001b..f9879eb4b89d 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -440,6 +440,7 @@ impl Engine { "has_mie2" => "mie2", // x64 features to detect + "has_cmpxchg16b" => "cmpxchg16b", "has_sse3" => "sse3", "has_ssse3" => "ssse3", "has_sse41" => "sse4.1", diff --git a/docs/WASI-background.md b/docs/WASI-background.md deleted file mode 100644 index 6d3abc6884ab..000000000000 --- a/docs/WASI-background.md +++ /dev/null @@ -1,179 +0,0 @@ -One of the biggest challenges in WebAssembly is figuring out what it's -supposed to be. - -## A brief tangent on some related history - -The LLVM WebAssembly backend has gone down countless paths that it has -ended up abandoning. One of the early questions was whether we should use -an existing object file format, such as ELF, or design a new format. - -Using an existing format is very appealing. We'd be able to use existing -tools, and be familiar to developers. It would even make porting some -kinds of applications easier. And existing formats carry with them -decades of "lessons learned" from many people in many settings, building, -running, and porting real-world applications. - -The actual WebAssembly format that gets handed to platforms to run is -its own format, but there'd be ways to make things work. To reuse existing -linkers, we could have a post-processing tool which translates from the -linker's existing output format into a runnable WebAssembly module. We -actually made a fair amount of progress toward building this. - -But then, using ELF for example, we'd need to create a custom segment -type (in the `PT_LOPROC`-`PT_HIPROC` range) instead of the standard -`PT_LOAD` for loading code, because WebAssembly functions aren't actually -loaded into the program address space. And same for the `PT_LOAD` for the -data too, because especially once WebAssembly supports threads, memory -initialization will need to -[work differently](https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md#design). -And we could omit the `PT_GNU_STACK`, because WebAssembly's stack can't -be executable. And maybe we could omit `PT_PHDR` because unless -we replicate the segment headers in data, they won't actually be -accessible in memory. And so on. - -And while in theory everything can be done within the nominal ELF -standard, in practice we'd have to make major changes to existing ELF -tools to support this way of using ELF, which would defeat many of the -advantages we were hoping to get. And we'd still be stuck with a custom -post-processing step. And it'd be harder to optimize the system to -take advantage of the unique features of WebAssembly, because everything -would have to work within this external set of constraints. - -So while the LLVM WebAssembly backend started out trying to use ELF, we -eventually decided to back out of that and design a -[new format](https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md). - -## Now let's talk APIs - -It's apparent to anyone who's looked under the covers at Emscripten's interface -between WebAssembly and the outside world that the current system is particular -to the way Emscripten currently works, and not well suited for broader adoption. -This is especially true as interest grows in running WebAssembly outside -of browsers and outside of JS VMs. - -It's been obvious since WebAssembly was just getting started that it'd eventually -want some kind of "system call"-like API, which could be standardized, and -implemented in any general-purpose WebAssembly VM. - -And while there are many existing systems we could model this after, [POSIX] -stands out, as being a vendor-neutral standard with considerable momentum. Many -people, including us, have been assuming that WebAssembly would eventually -have some kind of POSIX API. Some people have even started experimenting with -what -[this](https://github.com/WAVM/Wavix/) -[might](https://github.com/jfbastien/musl) -[look](https://github.com/golang/go/blob/e5489cfc12a99f25331831055a79750bfa227943/misc/wasm/wasm_exec.js) -[like](https://github.com/emscripten-core/emscripten/blob/incoming/src/library_syscall.js). - -But while a lot of things map fairly well, some things are less clear. One of -the big questions is how to deal with the concept of a "process". POSIX's IPC -mechanisms are designed around process, and in fact, the term "IPC" itself -has "process" baked into it. The way we even think about what "IPC" means -bakes in in understandings about what processes are and what communication -between them looks like. - -Pipes, Unix-domain sockets, POSIX shared memory, signals, files with `fcntl` -`F_SETLK`/`F_GETLK`-style locking (which is process-associated), are tied -to processes. But what *is* a process, when we're talking about WebAssembly? - -## Stick a fork in it - -Suppose we say that a WebAssembly instance is a "process", for the purposes -of the POSIX API. This initially seems to work out well, but it leaves us -with several holes to fill. Foremost is `fork`. `fork` is one of the pillars -of Unix, but it's difficult to implement outside of a full Unix-style OS. We -probably *can* make it work in all the places we want to run WebAssembly, but -do we want to? It'd add a bunch of complexity, inefficiency, subtle behavioral -differences, or realistically, a combination of all three. - -Ok, so maybe we can encourage applications to use `posix_spawn` instead. And -some already do, but in doing so we do lose some of the value of POSIX's -momentum. And even with `posix_spawn`, many applications will explicitly do -things like `waitpid` on the resulting PID. We can make this work too, but -we should also take a moment and step back to think about IPC in general. - -In WebAssembly, instances can synchronously call each other, and it can be -very efficient. This is not something that typical processes can do. Arguably, -a lot of what we now think of as "IPC" is just working around the inability -of processes to have calls between each other. And, WebAssembly instances will -be able to import each others' memories and tables, and eventually even pass -around slices to their memories. In WebAssembly circles we don't even tend to -think of these as IPC mechanisms, because the process metaphor just doesn't -fit very well here. We're going to want applications to use these mechanisms, -because they're efficient and take advantage of the platform, rather than -using traditional Unix-style IPC which will often entail emulation and -inefficiencies. - -Of course, there will always be a role for aiding porting of existing -applications. Libraries that emulate various details of Unix semantics are -valuable. But we can consider them tools for solving certain practical -problems, rather than the primary interfaces of the system, because they -miss out on some of the platform's fundamental features. - -## Mm-Mm Mmap - -Some of the fundamental assumptions of `mmap` are that there exists a -relatively large virtual address space, and that unmapped pages don't -occupy actual memory. The former doesn't tend to hold in WebAssembly, -where linear address spaces tend to be only as big as necessary. - -For the latter, would it be possible to make a WebAssembly engine capable -of unmapping pages in the middle of a linear memory region, and releasing -the resources? Sure. Is this a programming technique we want WebAssembly -programs doing in general, requiring all VMs to implement this? -Probably not. - -What's emerging is a sense that what we want is a core set of -APIs that can be implemented very broadly, and then optional API -modules that VMs can opt into supporting if it makes sense for them. -And with this mindset, `mmap` feels like it belongs in one of these -optional sets, rather than in the core. - -(although note that even for the use case of reading files quickly, -`mmap` -[isn't always better than just reading into a buffer](https://blog.burntsushi.net/ripgrep/). - -## A WebAssembly port of Debian? - -This is a thought-experiment. Debian is ported to numerous hardware -architectures. WebAssembly in some settings is presented as a hardware -architecture. Would it make sense to port the Debian userspace to -WebAssembly? What would this look like? What would it be useful for? - -It would be kind of cool to have a WebAssembly-powered Unix shell -environment or even a graphical desktop environment running inside a -browser. But would it be *really* cool? Significantly more cool than, -say, an SSH or VNC session to an instance in the cloud? Because to do -much with it, you'll want a filesystem, a network stack, and so on, -and there's only so much that browsers will let you do. - -To be sure, it certainly would be cool. But there's a tendency in -some circles to think of something like Debian as the natural end goal -in a system API and toolchain for WebAssembly. We feel this tendency -too ourselves. But it's never really been clear how it's supposed to -work. - -The insight here is that we can split the design space, rather than -trying to solve everything at once. We can have a core set of APIs -that will be enough for most applications, but that doesn't try to -support all of Debian userland. This will make implementations more -portable, flexible, testable, and robust than if we tried to make -every implementation support everything, or come up with custom -subsets. - -As mentioned above, there is room for additional optional APIs to be -added beyond the core WASI set. And there's absolutely a place for -tools and libraries that features that aren't in the standard -platform. So people interested in working on a Debian port can still -have a path forward, but we don't need to let this become a focus for -the core WASI design. - -## A picture emerges - -While much of what's written here seems relatively obvious in -retrospect, this clarity is relatively new. We're now seeing many of the -ideas which have been swirling around, some as old as WebAssembly -itself, come together into a cohesive overall plan, which makes this -an exciting time. - -[POSIX]: http://pubs.opengroup.org/onlinepubs/9699919799/ diff --git a/docs/WASI-capabilities.md b/docs/WASI-capabilities.md index 7ed73a42ff3c..1d8cddb7f18f 100644 --- a/docs/WASI-capabilities.md +++ b/docs/WASI-capabilities.md @@ -1,81 +1,6 @@ # Additional background on Capabilities -## Unforgeable references +For more information about capabilities in WASI, see the WASI Subgroup's +[capabilities documentation]. -One of the key words that describes capabilities is *unforgeable*. - -A pointer in C is forgeable, because untrusted code could cast an integer -to a pointer, thus *forging* access to whatever that pointer value points -to. - -MVP WebAssembly doesn't have unforgeable references, but what we can do instead -is just use integer values which are indices into a table that's held outside -the reach of untrusted code. The indices themselves are forgeable, but -ultimately the table is the thing which holds the actual capabilities, and -its elements are unforgeable. There's no way to gain access to a new resource -by making up a new index. - -When the reference-types proposal lands, references will be unforgeable, and -will likely subsume the current integer-based APIs, at the WASI API layer. - -## Static vs dynamic capabilities - -There are two levels of capabilities that we can describe: static and dynamic. - -The static capabilities of a wasm module are its imports. These essentially -declare the set of "rights" the module itself will be able to request. -An important caveat though is that this doesn't consider capabilities which -may be passed into an instance at runtime. - -The dynamic capabilities of a wasm module are a set of boolean values -associated with a file descriptor, indicating individual "rights". This -includes things like the right to read, or to write, using a given file -descriptor. - -## Filesystem rules - -It happens that integer indices representing capabilities is same thing that -POSIX does, except that POSIX calls these indices *file descriptors*. - -One difference though is that POSIX normally allows processes to request -a file descriptor for any file in the entire filesystem hierarchy, which is -granted based on whatever security policies are in place. This doesn't -violate the capability model, but it doesn't take full advantage of it. - -CloudABI, Fuchsia, and other capability-oriented systems prefer to take -advantage of the hierarchical nature of the filesystem and require untrusted -code to have a capability for a directory in order to access things inside -that directory. - -This way, you can launch untrusted code, and at runtime give it access to -specific directories, without having to set permissions in the filesystem or -in per-application or per-user configuration settings. - -See [this tutorial](WASI-tutorial.md) for an example of how this can look -in practice. - -## Berkeley socket rules - -Sockets aren't naturally hierarchical though, so we'll need to decide what -capabilities look like. This is an area that isn't yet implemented. - -In CloudABI, users launch programs with the sockets they need already -created. That's potentially a starting point, which might be enough for -simple cases. - -We also anticipate an eventual extension to that, where we create a capability -that represents a set of possible sockets that can be created. A set -might be described by ranges of permitted ports, ranges of permitted -addresses, or sets of permitted protocols. In this case the actual socket -wouldn't be created until the application actually requests it. - -## Other info - -CloudABI's intro to capability-based OS security provides additional background info: - -https://github.com/NuxiNL/cloudabi#capability-based-security - - -The Fuchsia project has a blog post on the topic of capability-based OS security: - -https://fuchsia.dev/fuchsia-src/concepts/filesystems/dotdot +[capabilities documentation]: https://github.com/WebAssembly/WASI/blob/main/docs/Capabilities.md diff --git a/docs/WASI-documents.md b/docs/WASI-documents.md index 5734cb92b060..1d2c930a2e7f 100644 --- a/docs/WASI-documents.md +++ b/docs/WASI-documents.md @@ -1,28 +1,5 @@ # WASI Document Guide -To get started using WASI, see [the intro document](WASI-intro.md) and -[the tutorial](WASI-tutorial.md). +For documentation about WASI, see the [Resources page on wasi.dev]. -For more detail on what WASI is, see [the overview](WASI-overview.md). - -For specifics on the API, see the [API documentation](https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/docs.md). -Additionally, a C header file describing the WASI API is -[here](https://github.com/WebAssembly/wasi-libc/blob/master/libc-bottom-half/headers/public/wasi/api.h). - -The WASI C/C++ SDK repository is [wasi-sdk](https://github.com/WebAssembly/wasi-sdk/). - -The WASI libc repository, used by wasi-sdk, is [wasi-libc](https://github.com/WebAssembly/wasi-libc/). - -For some discussion of capability-based design, see the [Capabilities document](WASI-capabilities.md). - -For some discussion of WASI's design inspiration, see the [Background document](WASI-background.md). - -For background on some of the design decisions in WASI, see [the rationale](WASI-rationale.md). - -For documentation of the exports required of programs using, see -[the application ABI](https://github.com/WebAssembly/WASI/blob/main/legacy/application-abi.md). - -For some ideas of things that we may want to change about WASI in the -short term, see the [possible changes](WASI-some-possible-changes.md) document. -For longer-term ideas, see the [possible future features](WASI-possible-future-features.md) -document. +[Resources page on wasi.dev]: https://wasi.dev/resources diff --git a/docs/WASI-intro.md b/docs/WASI-intro.md index 76984c8d4a50..f399054956f4 100644 --- a/docs/WASI-intro.md +++ b/docs/WASI-intro.md @@ -1,60 +1,9 @@ # Welcome to WASI! -WASI stands for WebAssembly System Interface. It's an API designed by -the [Wasmtime] project that provides access to several operating-system-like -features, including files and filesystems, Berkeley sockets, clocks, and -random numbers, that we'll be proposing for standardization. +WebAssembly System Interface, or WASI, is a family of APIs being designed +by the [WASI Subgroup] of the Wasm Community Group of the W3C. -It's designed to be independent of browsers, so it doesn't depend on -Web APIs or JS, and isn't limited by the need to be compatible with JS. -And it has integrated capability-based security, so it extends -WebAssembly's characteristic sandboxing to include I/O. +To learn more about WASI, see [wasi.dev]. -See the [WASI Overview](WASI-overview.md) for more detailed background -information, and the [WASI Tutorial](WASI-tutorial.md) for a walkthrough -showing how various pieces fit together. - -Note that everything here is a prototype, and while a lot of stuff works, -there are numerous missing features and some rough edges. For example, -networking support is incomplete. - -## How can I write programs that use WASI? - -The two toolchains that currently work well are the Rust toolchain and -a specially packaged C and C++ toolchain. Of course, we hope other -toolchains will be able to implement WASI as well! - -### Rust - -To install a WASI-enabled Rust toolchain, see the [online section of the -guide](https://bytecodealliance.github.io/wasmtime/examples-rust-embed.html) - -### C/C++ - -To install a WASI-enabled C/C++ toolchain, see the [online section of the -guide](https://bytecodealliance.github.io/wasmtime/lang-c.html) - -## How can I run programs that use WASI? - -Currently the options are [Wasmtime] and the [browser polyfill], though we -intend WASI to be implementable in many wasm VMs. - -[Wasmtime]: https://github.com/bytecodealliance/wasmtime -[browser polyfill]: https://github.com/bjorn3/browser_wasi_shim - -### Wasmtime - -[Wasmtime] is a non-Web WebAssembly engine which is part of the -[Bytecode Alliance project](https://bytecodealliance.org). To build -it, download the code and build with `cargo build --release`. It can -run WASI-using wasm programs by simply running `wasmtime foo.wasm`, -or `cargo run --bin wasmtime foo.wasm`. - -### The browser polyfill - -The source is [here](https://github.com/bjorn3/browser_wasi_shim). - -## Where can I learn more? - -Beyond the [WASI Overview](WASI-overview.md), take a look at the -various [WASI documents](WASI-documents.md). +[WASI Subgroup]: https://github.com/WebAssembly/wasi +[wasi.dev]: https://wasi.dev/ diff --git a/docs/WASI-overview.md b/docs/WASI-overview.md index 8c3f465aa6fb..f9e5a28027d1 100644 --- a/docs/WASI-overview.md +++ b/docs/WASI-overview.md @@ -1,163 +1,9 @@ # WASI: WebAssembly System Interface -WebAssembly System Interface, or WASI, is a new family of API's being -designed by the [Wasmtime] project to propose as a standard engine-independent -non-Web system-oriented API for WebAssembly. Initially, the focus is on -WASI Core, an API module that covers files, networking, and a few other -things. Additional modules are expected to be added in the future. +WebAssembly System Interface, or WASI, is a family of APIs being designed +by the [WASI Subgroup] of the Wasm Community Group of the W3C. -WebAssembly is designed to run well on the Web, however it's -[not limited to the Web](https://github.com/WebAssembly/design/blob/master/NonWeb.md). -The core WebAssembly language is independent of its surrounding -environment, and WebAssembly interacts with the outside world -exclusively through APIs. On the Web, it naturally uses the -existing Web APIs provided by browsers. However outside of -browsers, there's currently no standard set of APIs that -WebAssembly programs can be written to. This makes it difficult to -create truly portable non-Web WebAssembly programs. +To learn more about WASI, see the [Interfaces page on wasi.dev]. -WASI is an initiative to fill this gap, with a clean set of APIs -which can be implemented on multiple platforms by multiple engines, -and which don't depend on browser functionality (although they -still can run in browsers; see below). - -## Capability-Oriented - -The design follows -[CloudABI](https://github.com/NuxiNL/cloudlibc)'s -(and in turn -[Capsicum](https://www.cl.cam.ac.uk/research/security/capsicum/))'s concept of -[capability-based security](https://en.wikipedia.org/wiki/Capability-based_security), -which fits well into WebAssembly's sandbox model. Files, -directories, network sockets, and other resources are identified -by UNIX-like file descriptors, which are indices into external -tables whose elements represent capabilities. Similar to how core -WebAssembly provides no ability to access the outside world without -calling imported functions, WASI APIs provide no ability to access -the outside world without an associated capability. - -For example, instead of a typical -[open](http://pubs.opengroup.org/onlinepubs/009695399/functions/open.html) -system call, WASI provides an -[openat](https://linux.die.net/man/2/openat)-like -system call, requiring the calling process to have a file -descriptor for a directory that contains the file, representing the -capability to open files within that directory. (These ideas are -common in capability-based systems.) - -However, the WASI libc implementation still does provide an -implementation of open, by taking the approach of -[libpreopen](https://github.com/musec/libpreopen). -Programs may be granted capabilities for directories on launch, and -the library maintains a mapping from their filesystem path to the -file descriptor indices representing the associated capabilities. -When a program calls open, they look up the file name in the map, -and automatically supply the appropriate directory capability. It -also means WASI doesn't require the use of CloudABI's `program_main` -construct. This eases porting of existing applications without -compromising the underlying capability model. See the diagram below -for how libpreopen fits into the overall software architecture. - -WASI also automatically provides file descriptors for standard -input and output, and WASI libc provides a normal `printf`. In -general, WASI is aiming to support a fairly full-featured libc -implementation, with the current implementation work being based on -[musl](http://www.musl-libc.org/). - -## Portable System Interface for WebAssembly - -WASI is being designed from the ground up for WebAssembly, with -sandboxing, portability, and API tidiness in mind, making natural -use of WebAssembly features such as i64, import functions with -descriptive names and typed arguments, and aiming to avoid being -tied to a particular implementation. - -We'll often call functions in these APIs "syscalls", because they -serve an analogous purpose to system calls in native executables. -However, they're just functions that are provided by the -surrounding environment that can do I/O on behalf of the program. - -WASI is starting with a basic POSIX-like set of syscall functions, -though adapted to suit the needs of WebAssembly, such as in -excluding functions such as fork and exec which aren't easily -implementable in some of the places people want to run WebAssembly, -and such as in adopting a capabilities-oriented design. - -And, as WebAssembly grows support for -[host bindings](https://github.com/webassembly/host-bindings) -and related features, capabilities can evolve to being represented -as opaque, unforgeable -[reference typed values](https://github.com/WebAssembly/reference-types), -which can allow for finer-grained control over capabilities, and -make the API more accessible beyond the C-like languages that -POSIX-style APIs are typically aimed at. - -## WASI Software Architecture - -To facilitate use of the WASI API, a libc -implementation called WASI libc is being developed, which presents -a relatively normal musl-based libc interface, implemented on top -of a libpreopen-like layer and a system call wrapper layer (derived -from the "bottom half" of -[cloudlibc](https://github.com/NuxiNL/cloudlibc)). -The system call wrapper layer makes calls to the actual WASI -implementation, which may map these calls to whatever the -surrounding environment provides, whether it's native OS resources, -JS runtime resources, or something else entirely. - -[This libc is part of a "sysroot"](https://github.com/WebAssembly/reference-sysroot), -which is a directory containing compiled libraries and C/C++ header -files providing standard library and related facilities laid out in -a standard way to allow compilers to use it directly. - -With the [LLVM 8.0](http://llvm.org/) -release, the WebAssembly backend is now officially stable, but LLVM -itself doesn't provide a libc - a standard C library, which you -need to build anything with clang. This is what the WASI-enabled -sysroot provides, so the combination of clang in LLVM 8.0 and the -new WASI-enabled sysroot provides usable Rust and C compilation -environments that can produce wasm modules that can be run in -[Wasmtime] with WASI support, in browsers with the WASI polyfill, -and in the future other engines as well. - -![WASI software architecture diagram](wasi-software-architecture.png "WASI software architecture diagram") - -## Future Evolution - -The first version of WASI is relatively simple, small, and -POSIX-like in order to make it easy for implementers to prototype -it and port existing code to it, making it a good way to start -building momentum and allow us to start getting feedback based on -experience. - -Future versions will change based on experience -and feedback with the first version, and add features to address -new use cases. They may also see significant architectural -changes. One possibility is that this API could -evolve into something like -[Fuchsia](https://en.wikipedia.org/wiki/Google_Fuchsia)'s -low-level APIs, which are more complex and abstract, though also -more capable. - -We also expect that whatever WASI evolves into in the future, it -should be possible to implement this initial API as a library -on top. - -## Can WASI apps run on the Web? - -Yes! We have a polyfill which implements WASI and runs in browsers. -At the WebAssembly level, WASI is just a set of callable functions that -can be imported by a .wasm module, and these imports can be implemented -in a variety of ways, including by a JavaScript polyfill library running -within browsers. - -And in the future, it's possible that -[builtin modules](https://github.com/tc39/ecma262/issues/395) -could take these ideas even further allowing easier and tighter -integration between .wasm modules importing WASI and the Web. - -## Work in Progress - -WASI is currently experimental. Feedback is welcome! - -[Wasmtime]: https://github.com/bytecodealliance/wasmtime +[WASI Subgroup]: https://github.com/WebAssembly/wasi +[Interfaces page on wasi.dev]: https://wasi.dev/interfaces diff --git a/docs/WASI-possible-future-features.md b/docs/WASI-possible-future-features.md deleted file mode 100644 index 77f676b0ebdb..000000000000 --- a/docs/WASI-possible-future-features.md +++ /dev/null @@ -1,49 +0,0 @@ -# Possible Future Features - -These are some features we're interested in, but don't have yet, and which will -require some amount of design work. - -## File Locking - -POSIX's answer is `fcntl` with `F_SETLK`/`F_GETLK`/etc., which provide advisory -record locking. Unfortunately, these locks are associated with processes, which -means that if two parts of a program independently open a file and try to lock -it, if they're in the same process, they automatically share the lock. - -Other locking APIs exist on various platforms, but none is widely standardized. - -POSIX `F_SETLK`-style locking is used by SQLite. - -## File change monitoring - -POSIX has no performant way to monitor many files or directories for changes. - -Many popular operating systems have system-specific APIs to do this though, so -it'd be desirable to come up with a portable API to provide access to this -functionality. - -## Scalable event-based I/O - -POSIX's `select` and `poll` have the property that each time they're called, -the implementation has to scan through all the file descriptors to report if any -of them has I/O ready, which is inefficient when there are large numbers of -open files or sockets. - -Many popular operating systems have system-specific APIs that provide -alternative ways to monitor large numbers of I/O streams though, so it'd be -desirable to come up with a portable API to provide access to this -functionality. - -## Crash recovery - -POSIX doesn't have clear guidance on what applications can expect their -data will look like if the system crashes or the storage device is otherwise -taken offline abruptly. - -We have `fsync` and `fdatasync`, but even these have been a topic of -[much discussion]. - -[much discussion]: https://wiki.postgresql.org/wiki/Fsync_Errors - -Also, currently WASI's docs don't make any guarantees about things like -`path_rename` being atomic. diff --git a/docs/WASI-rationale.md b/docs/WASI-rationale.md deleted file mode 100644 index d1c8e093858e..000000000000 --- a/docs/WASI-rationale.md +++ /dev/null @@ -1,160 +0,0 @@ -## Why not a more traditional set of POSIX-like syscalls? - -In related work, the LLVM wasm backend started out trying to use ELF object -files for wasm, to be as conventional as possible. But wasm doesn't fit into -ELF in some very fundamental ways. Code isn't in the address space, callers -have to know their callee's exact signatures, imports and exports don't have -ELF semantics, function pointers require tables to be populated, index 0 is -valid in some contexts where it isn't in ELF, and so on. It ultimately got -to the point where the work we were considering doing to *emulate* ELF -interfaces to make existing tools happy looked like more than the work that -would be required to just build new tools. - -The analogy isn't perfect, but there are some parallels to what we're now -figuring out about system calls. Many people, including us, had initially -assumed that at least some parts of the wasm ecosystem would eventually -standardize on a basic map of POSIX-like or Linux-like system calls into wasm -imports. However, this turns out to be more complex than it initially seems. - -One of WebAssembly's unique attributes is the ability to run sandboxed -without relying on OS process boundaries. Requiring a 1-to-1 correspondence -between wasm instances and heavyweight OS processes would take away this key -advantage for many use cases. Fork/exec are the obvious example of an API -that's difficult to implement well if you don't have POSIX-style processes, -but a lot of other things in POSIX are tied to processes too. So it isn't -a simple matter to take POSIX, or even a simple subset of it, to WebAssembly. - -We should note that Spectre concerns are relevant here, though for now we'll -just observe that actual security depends on the details of implementations -and use cases, and it's not necessarily a show-stopper. - -Another area where WebAssembly differs from traditional POSIX-like platforms -is in its Capability-oriented approach to security. WebAssembly core has no -ability to address the outside world, except through interacting with -imports/exports. And when reference types are added, they'll be able to -represent very fine-grained and dynamic capabilities. - -A capability-oriented system interface fits naturally into WebAssembly's -existing sandbox model, by extending the simple story that a wasm module -can't do anything until given capabilities. There are ways to sandbox -traditional OS filesystem APIs too, but in a multiple-implementation -ecosystem where the methods for setting up path filtering will likely -differ between implementations, designing the platform around capabilities -will make it easier for people to consistently configure the capabilities -available to wasm modules. - -This is where we see WASI heading. - -## Why not non-blocking? - -This is an open question. We're using blocking APIs for now because that's -*by far* the simpler way to get the overall system to a usable state, on -both the wasm runtime side and the toolchain side. But one can make an -argument that non-blocking APIs would have various advantages, so we -look forward to discussing this topic with the WebAssembly CG subgroup -once it's set up. - -## Why not async? - -We have some ideas about how the current API could be extended to be async. -In particular, we can imagine making a distinction between WebAssembly -programs which are *Commands* and those which we'll call *Reactors*. -Commands have a `main` function which is called once, and when `main` -exits, the program is complete. Reactors have a setup function, but -once that completes, the instance remains live and is called from callbacks. -In a Reactor, there's an event loop which lives outside of the nominal -program. - -With this distinction, we may be able to say things like: - - In a Reactor, WASI APIs are available, but all functions have an - additional argument, which specifies a function to call as a continuation - once the I/O completes. This way, we can use the same conceptual APIs, - but adapt them to run in an callback-based async environment. - - In a Command, WASI APIs don't have callback parameters. Whether or not - they're non-blocking is an open question (see the previous question). - -Reactors might then be able to run in browsers on the main thread, -while Commands in browsers might be limited to running in Workers. - -## Why no mmap and friends? - -True mmap support is something that could be added in the future, -though it is expected to require integration with the core language. -See "Finer-grained control over memory" in WebAssembly's -[Future Features] document for an overview. - -Ignoring the many non-standard mmap extensions out there, -the core mmap behavior is not portable in several respects, even -across POSIX-style systems. See -[LevelDB's decision to stop using mmap], for one example in -practice, and search for the word "unspecified" in the -[POSIX mmap spec] for some others. - -And, some features of mmap can lead to userspace triggering -signals. Accessing memory beyond the end of the file, including in -the case where someone else changes the size of the file, leads to a -`SIGBUS` on POSIX-style systems. Protection modes other than -`PROT_READ|PROT_WRITE` can produce `SIGSEGV`. While some VMs are -prepared to catch such signals transparently, this is a burdensome -requirement for others. - -Another issue is that while WASI is a synchronous I/O API today, -this design may change in the future. `mmap` can create situations -where doing a load can entail blocking I/O, which can make it -harder to characterize all the places where blocking I/O may occur. - -And lastly, WebAssembly linear memory doesn't support the semantics -of mapping and unmapping pages. Most WebAssembly VMs would not -easily be able to support freeing the memory of a page in the middle -of a linear memory region, for example. - -To make things easier for people porting programs that just use -mmap to read and write files in a simple way, WASI libc includes a -minimal userspace emulation of `mmap` and `munmap`. - -[POSIX mmap spec]: http://pubs.opengroup.org/onlinepubs/7908799/xsh/mmap.html -[LevelDB's decision to stop using mmap]: https://groups.google.com/forum/#!topic/leveldb/C5Hh__JfdrQ -[Future Features]: https://webassembly.org/docs/future-features/. - -## Why no UNIX-domain sockets? - -UNIX-domain sockets can communicate three things: - - bytes - - file descriptors - - user credentials - -The concept of "users" doesn't fit within WASI, because many implementations -won't be multi-user in that way. - -It can be useful to pass file descriptor between wasm instances, however in -wasm this can be done by passing them as arguments in plain function calls, -which is much simpler and quicker. And, in WASI implementations where file -descriptors don't correspond to an underlying Unix file descriptor concept, -it's not feasible to do this if the other side of the socket isn't a -cooperating WebAssembly engine. - -We may eventually want to introduce a concept of a WASI-domain socket, for -bidirectional byte-oriented local communication. - -## Why no dup? - -The main use cases for `dup` are setting up the classic Unix dance of setting -up file descriptors in advance of performing a `fork`. Since WASI has no `fork`, -these don't apply. - -And avoiding `dup` for now avoids committing to the POSIX concepts of -descriptors being distinct from file descriptions in subtle ways. - -## Why are `path_remove_directory` and `path_unlink_file` separate syscalls? - -In POSIX, there's a single `unlinkat` function, which has a flag word, -and with the `AT_REMOVEDIR` flag one can specify whether one wishes to -remove a file or a directory. However, there really are two distinct -functions being performed here, and having one system call that can -select between two different behaviors doesn't simplify the actual API -compared to just having two system calls. - -More importantly, in WASI, system call imports represent a static list -of the capabilities requested by a wasm module. Therefore, WASI prefers -each system call to do just one thing, so that it's clear what a wasm -module that imports it might be able to do with it. diff --git a/docs/WASI-some-possible-changes.md b/docs/WASI-some-possible-changes.md deleted file mode 100644 index 84cde2a084c6..000000000000 --- a/docs/WASI-some-possible-changes.md +++ /dev/null @@ -1,114 +0,0 @@ -# Possible changes - -The following are a list of relatively straightforward changes -to WASI core that should be considered. - -## Split file/networking/random/clock from args/environ/exit. - -Currently everything is mixed together in one big "core" module. But we can -split them out to allow minimal configurations that don't support this style -of files and networking. - -## Move higher-level and unused errno codes out of the core API. - -The core API currently defines errno codes such as `EDOM` which are -not used for anything. POSIX requires them to be defined, however -that can be done in the higher-level libraries, rather than in the -WASI core API itself. - -## Detecting EOF from read/recv explicitly. - -POSIX's `read` returns 0 if and only if it reaches the end of a file or stream. - -Say you have a read buffer of 1024 bytes, and are reading a file that happens -to be 7 bytes long. The first `read` call will return 7, but unless you happen -to know how big the file is supposed to be, you can't distinguish between -that being all there is, and `read` getting interrupted and returning less -data than you requested. - -Many applications today do an extra `read` when they encounter the end of a -file, to ensure that they get a `read` that returns 0 bytes read, to confirm -that they've reached the end of the file. If `read` instead had a way to -indicate that it had reached the end, this extra call wouldn't be necessary. - -And, `read` on a socket is almost equivalent to `recv` with no flags -- except for -one surprising special case: on a datagram socket, if there's a zero-length -datagram, `read` can't consume it, while `recv` can. This is because `read` can't -indicate that it successfully read 0 bytes, because it has overloaded the meaning -of 0 to indicate eof-of-file. - -So, it would be tidier from multiple perspectives if `read` could indicate -that it had reached the end of a file or stream, independently of how many -bytes it has read. - -## Merging read and recv - -These are very similar, and differ only in subtle ways. It'd make the API -easier to understand if they were unified. - -## Trap instead of returning EFAULT - -POSIX system calls return EFAULT when given invalid pointers, however from an -application perspective, it'd be more natural for them to just segfault. - -## More detailed capability error reporting - -Replace `__WASI_ENOTCAPABLE` with error codes that indicate *which* capabilities -were required but not present. - -## Split `__wasi_path_open` into `__wasi_path_open_file` and `__wasi_path_open_directory`? - -We could also split `__WASI_RIGHT_PATH_OPEN` into file vs directory, -(obviating `__WASI_O_DIRECTORY`). - -## Fix the y2556 bug - -In some places, timestamps are measured in nanoseconds since the UNIX epoch, -so our calculations indicate a 64-bit counter will overflow on -Sunday, July 21, 2554, at 11:34:33 pm UTC. - -These timestamps aren't used in that many places, so it wouldn't cost that -much to widen these timestamps. We can either just extend the current type to -128 bits (two i64's in wasm) or move to a `timespec`-like `tv_sec`/`tv_nsec` -pair. - -## Remove `fd_allocate`? - -Darwin doesn't implement `posix_fallocate` (similar to `fd_allocate`), despite it being -in POSIX since 2001. So we don't currently know any way to implement `fd_allocate` -on Darwin that's safe from race conditions. Should we remove it from the API? - -## Redesign `fstflags_t` - -The relationship between `*_SET_*TIM` and `*_SET_*TIM_NOW` is non-obvious. -We should look at this again. - -## readdir - -Truncating entries that don't fit into a buffer may be error-prone. Should -we redesign how directory reading works? - -## symlinks - -Symlinks are fairly UNIX-specific. Should we remove `__wasi_path_symlink` -and `__wasi_path_readlink`? - -Also, symlink resolution doesn't benefit from libpreopen-style path -translation. Should we move symlink resolution into the libpreopen layer -and do it entirely in "userspace"? - -## Remove the `path_len` argument from `__wasi_fd_prestat_dir_name` - -The buffer should be sized to the length returned from `__wasi_fd_prestat_get`, -so it's not necessary to pass the length back into the runtime. - -## Add a `__wasi_path_filestat_set_size` function? - -Along with libc/libpreopen support, this would enable implementing the -POSIX `truncate` function. - -## errno values returned by `path_open` - -We should specify the errno value returned when `path_open` is told -to open a directory and `__WASI_LOOKUP_SYMLINK_FOLLOW` isn't set, and -the path refers to a symbolic link. diff --git a/docs/WASI-tutorial.md b/docs/WASI-tutorial.md index 6e9e7a22717c..1c7ff8c7e39c 100644 --- a/docs/WASI-tutorial.md +++ b/docs/WASI-tutorial.md @@ -303,10 +303,10 @@ hello world ``` Or, you can compile the `.wat` WebAssembly text format into the wasm binary format -yourself using the [wabt] command line tools: +yourself using the [wasm-tools] command line tools: ``` -$ wat2wasm demo.wat +$ wasm-tools parse demo.wat -o demo.wasm ``` The created `.wasm` file can now be executed with `wasmtime` directly like so: @@ -316,8 +316,7 @@ $ wasmtime demo.wasm hello world ``` -To run this example within the browser, simply upload the compiled `.wasm` file to -the [WASI browser polyfill]. +To run this example within the browser, use [jco]. -[wabt]: https://github.com/WebAssembly/wabt -[WASI browser polyfill]: https://wasi.dev/polyfill/ +[wasm-tools]: https://github.com/bytecodealliance/wasm-tools +[jco]: https://github.com/bytecodealliance/jco diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index 7b19aa52ce04..4eca76c3e110 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -57,14 +57,16 @@ column is below. [^8]: A custom fuzzer exists but this isn't enabled yet for general-purpose fuzzing. -## Unsupported proposals - -* [`branch-hinting`] -* [`exception-handling`] -* [`flexible-vectors`] -* [`memory-control`] -* [`stack-switching`] -* [`shared-everything-threads`] +## Unimplemented proposals + +| Proposal | Tracking Issue | +|-------------------------------|----------------| +| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) | +| [`exception-handling`] | [#3427](https://github.com/bytecodealliance/wasmtime/issues/3427) | +| [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) | +| [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) | +| [`stack-switching`] | [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465) | +| [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) | [`mutable-globals`]: https://github.com/WebAssembly/mutable-global/blob/master/proposals/mutable-global/Overview.md [`sign-extension-ops`]: https://github.com/WebAssembly/spec/blob/master/proposals/sign-extension-ops/Overview.md diff --git a/fuzz/fuzz_targets/differential.rs b/fuzz/fuzz_targets/differential.rs index 802c8edeb9e8..5cc3fb4b4011 100644 --- a/fuzz/fuzz_targets/differential.rs +++ b/fuzz/fuzz_targets/differential.rs @@ -22,8 +22,8 @@ static SETUP: Once = Once::new(); // - ALLOWED_ENGINES=wasmi,spec cargo +nightly fuzz run ... // - ALLOWED_ENGINES=-v8 cargo +nightly fuzz run ... // - ALLOWED_MODULES=single-inst cargo +nightly fuzz run ... -static ALLOWED_ENGINES: Mutex> = Mutex::new(vec![]); -static ALLOWED_MODULES: Mutex> = Mutex::new(vec![]); +static ALLOWED_ENGINES: Mutex>> = Mutex::new(vec![]); +static ALLOWED_MODULES: Mutex>> = Mutex::new(vec![]); // Statistics about what's actually getting executed during fuzzing static STATS: RuntimeStats = RuntimeStats::new(); @@ -73,7 +73,13 @@ fn execute_one(data: &[u8]) -> Result<()> { // Choose an engine that Wasmtime will be differentially executed against. // The chosen engine is then created, which might update `config`, and // returned as a trait object. - let lhs = u.choose(&allowed_engines)?; + let lhs = match *u.choose(&allowed_engines)? { + Some(engine) => engine, + None => { + log::debug!("test case uses a runtime-disabled engine"); + return Ok(()); + } + }; let mut lhs = match engine::build(&mut u, lhs, &mut config)? { Some(engine) => engine, // The chosen engine does not have support compiled into the fuzzer, @@ -100,8 +106,12 @@ fn execute_one(data: &[u8]) -> Result<()> { panic!("unable to generate a module to fuzz against; check `ALLOWED_MODULES`") } let wasm = match *u.choose(&allowed_modules)? { - "wasm-smith" => build_wasm_smith_module(&mut u, &config)?, - "single-inst" => build_single_inst_module(&mut u, &config)?, + Some("wasm-smith") => build_wasm_smith_module(&mut u, &config)?, + Some("single-inst") => build_single_inst_module(&mut u, &config)?, + None => { + log::debug!("test case uses a runtime-disabled module strategy"); + return Ok(()); + } _ => unreachable!(), }; diff --git a/pulley/src/interp/interp_loop.rs b/pulley/src/interp/interp_loop.rs index c17cba3cac56..feb1612779fb 100644 --- a/pulley/src/interp/interp_loop.rs +++ b/pulley/src/interp/interp_loop.rs @@ -34,7 +34,7 @@ pub fn interpreter_loop(vm: &mut Vm, bytecode: &mut UnsafeBytecodeStream) -> Don /// when compiling without `#![feature(explicit_tail_calls)]` enabled (via /// `--cfg pulley_tail_calls`). /// -/// It seems rustc first parses the the function, encounters `become` and emits +/// It seems rustc first parses the function, encounters `become` and emits /// an error about using an unstable keyword on a stable compiler, then applies /// `#[cfg(...)` after parsing to disable the function. /// diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index cf8417bd6b60..211c06e32a4b 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -2274,6 +2274,12 @@ who = "Andrew Brown " criteria = "safe-to-deploy" delta = "0.6.0 -> 0.7.2" +[[audits.openvino]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +delta = "0.7.2 -> 0.8.0" +notes = "No new unsafe functionality, just brings in openvino-sys changes and other minor improvements." + [[audits.openvino-finder]] who = "Matthew Tamayo-Rios " criteria = "safe-to-deploy" @@ -2297,6 +2303,12 @@ who = "Andrew Brown " criteria = "safe-to-deploy" delta = "0.6.0 -> 0.7.2" +[[audits.openvino-finder]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +delta = "0.7.2 -> 0.8.0" +notes = "No logic changes in version bump." + [[audits.openvino-sys]] who = "Matthew Tamayo-Rios " criteria = "safe-to-deploy" @@ -2320,6 +2332,12 @@ who = "Andrew Brown " criteria = "safe-to-deploy" delta = "0.6.0 -> 0.7.2" +[[audits.openvino-sys]] +who = "Andrew Brown " +criteria = "safe-to-deploy" +delta = "0.7.2 -> 0.8.0" +notes = "This diff simply re-generates slightly newer C headers with a slightly newer version of bindgen." + [[audits.ort]] who = "Andrew Brown " criteria = "safe-to-deploy" @@ -2650,6 +2668,12 @@ Most of the rest of the changes are adding some new unstable features which aren't enabled by default. """ +[[audits.smallvec]] +who = "Alex Crichton " +criteria = "safe-to-deploy" +delta = "1.11.0 -> 1.13.2" +notes = "Mostly minor updates, the one semi-substantial update looks good." + [[audits.socket2]] who = "Alex Crichton " criteria = "safe-to-deploy" diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index a833a27ea233..30b9fe68a6f8 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -1513,6 +1513,13 @@ user-id = 2915 user-login = "Amanieu" user-name = "Amanieu d'Antras" +[[publisher.libm]] +version = "0.2.8" +when = "2023-10-06" +user-id = 2915 +user-login = "Amanieu" +user-name = "Amanieu d'Antras" + [[publisher.linux-raw-sys]] version = "0.3.8" when = "2023-05-19" diff --git a/tests/spec_testsuite b/tests/spec_testsuite index ae5a66933070..cbde6d5f26ba 160000 --- a/tests/spec_testsuite +++ b/tests/spec_testsuite @@ -1 +1 @@ -Subproject commit ae5a66933070b705dde56c2a71bf3fbc33282864 +Subproject commit cbde6d5f26ba12d4f455b65bd0648cdba4d95f15 diff --git a/tests/wast.rs b/tests/wast.rs index b8675f846e01..021c6f2cb782 100644 --- a/tests/wast.rs +++ b/tests/wast.rs @@ -224,6 +224,7 @@ fn should_fail(test: &Path, strategy: Strategy) -> bool { "throw_ref.wast", "try_table.wast", "tag.wast", + "instance.wast", ] .iter() .any(|i| test.ends_with(i))