diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle
index bc6771d35fa9..5630a97a5faa 100644
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -1658,6 +1658,10 @@
 (rule (x64_movdqu from)
       (xmm_unary_rm_r (SseOpcode.Movdqu) from))
 
+(decl x64_movapd (XmmMem) Xmm)
+(rule (x64_movapd src)
+      (xmm_unary_rm_r (SseOpcode.Movapd) src))
+
 (decl x64_pmovsxbw (XmmMem) Xmm)
 (rule (x64_pmovsxbw from)
       (xmm_unary_rm_r (SseOpcode.Pmovsxbw) from))
@@ -2272,6 +2276,11 @@
 (rule (x64_punpcklwd src1 src2)
       (xmm_rm_r $I16X8 (SseOpcode.Punpcklwd) src1 src2))
 
+;; Helper for creating `unpcklps` instructions.
+(decl x64_unpcklps (Xmm XmmMem) Xmm)
+(rule (x64_unpcklps src1 src2)
+      (xmm_rm_r $I16X8 (SseOpcode.Unpcklps) src1 src2))
+
 ;; Helper for creating `andnps` instructions.
 (decl x64_andnps (Xmm XmmMem) Xmm)
 (rule (x64_andnps src1 src2)
@@ -2624,6 +2633,11 @@
             (_ Unit (emit (MInst.XmmUnaryRmREvex op src dst))))
         dst))
 
+;; Helper for creating `vcvtudq2ps` instructions.
+(decl x64_vcvtudq2ps (XmmMem) Xmm)
+(rule (x64_vcvtudq2ps src)
+      (xmm_unary_rm_r_evex (Avx512Opcode.Vcvtudq2ps) src))
+
 ;; Helper for creating `vpabsq` instructions.
 (decl x64_vpabsq (XmmMem) Xmm)
 (rule (x64_vpabsq src)
@@ -3014,6 +3028,23 @@
             (_ Unit (emit (MInst.GprToXmm (SseOpcode.Cvtsi2sd) x dst size))))
         dst))
 
+(decl cvt_u64_to_float_seq (Type Gpr) Xmm)
+(rule (cvt_u64_to_float_seq ty src)
+      (let ((size OperandSize (raw_operand_size_of_type ty))
+            (src_copy WritableGpr (temp_writable_gpr))
+            (dst WritableXmm (temp_writable_xmm))
+            (tmp_gpr1 WritableGpr (temp_writable_gpr))
+            (tmp_gpr2 WritableGpr (temp_writable_gpr))
+            (_ Unit (emit (gen_move $I64 src_copy src)))
+            (_ Unit (emit (MInst.CvtUint64ToFloatSeq size src_copy dst tmp_gpr1 tmp_gpr2))))
+        dst))
+
+(decl fcvt_uint_mask_const () VCodeConstant)
+(extern constructor fcvt_uint_mask_const fcvt_uint_mask_const)
+
+(decl fcvt_uint_mask_high_const () VCodeConstant)
+(extern constructor fcvt_uint_mask_high_const fcvt_uint_mask_high_const)
+
 ;; Helpers for creating `pcmpeq*` instructions.
 (decl x64_pcmpeq (Type Xmm XmmMem) Xmm)
 (rule (x64_pcmpeq $I8X16 x y) (x64_pcmpeqb x y))
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index 088c0bd15f39..c8e44d3758e4 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -26,6 +26,16 @@ impl Inst {
             dst: WritableGpr::from_writable_reg(src).unwrap(),
         }
     }
+
+    fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+        src.assert_regclass_is(RegClass::Float);
+        debug_assert!(dst.to_reg().class() == RegClass::Float);
+        Inst::XmmUnaryRmREvex {
+            op,
+            src: XmmMem::new(src).unwrap(),
+            dst: WritableXmm::from_writable_reg(dst).unwrap(),
+        }
+    }
 }
 
 #[test]
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index 4b2a02a02c01..267d664bed2e 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -307,16 +307,6 @@ impl Inst {
         }
     }
 
-    pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        src.assert_regclass_is(RegClass::Float);
-        debug_assert!(dst.to_reg().class() == RegClass::Float);
-        Inst::XmmUnaryRmREvex {
-            op,
-            src: XmmMem::new(src).unwrap(),
-            dst: WritableXmm::from_writable_reg(dst).unwrap(),
-        }
-    }
-
     pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
         src.assert_regclass_is(RegClass::Float);
         debug_assert!(dst.to_reg().class() == RegClass::Float);
@@ -417,27 +407,6 @@ impl Inst {
         Inst::XmmCmpRmR { op, src, dst }
     }
 
-    pub(crate) fn cvt_u64_to_float_seq(
-        dst_size: OperandSize,
-        src: Writable<Reg>,
-        tmp_gpr1: Writable<Reg>,
-        tmp_gpr2: Writable<Reg>,
-        dst: Writable<Reg>,
-    ) -> Inst {
-        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
-        debug_assert!(src.to_reg().class() == RegClass::Int);
-        debug_assert!(tmp_gpr1.to_reg().class() == RegClass::Int);
-        debug_assert!(tmp_gpr2.to_reg().class() == RegClass::Int);
-        debug_assert!(dst.to_reg().class() == RegClass::Float);
-        Inst::CvtUint64ToFloatSeq {
-            src: WritableGpr::from_writable_reg(src).unwrap(),
-            dst: WritableXmm::from_writable_reg(dst).unwrap(),
-            tmp_gpr1: WritableGpr::from_writable_reg(tmp_gpr1).unwrap(),
-            tmp_gpr2: WritableGpr::from_writable_reg(tmp_gpr2).unwrap(),
-            dst_size,
-        }
-    }
-
     pub(crate) fn cvt_float_to_sint_seq(
         src_size: OperandSize,
         dst_size: OperandSize,
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 01915b35a900..e6fd35c45a82 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3013,3 +3013,76 @@
 
 (rule (lower (fcvt_low_from_sint a @ (value_type ty)))
       (x64_cvtdq2pd ty a))
+
+;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
+      (x64_cvtsi2ss $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
+
+(rule (lower (has_type $F64 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
+      (x64_cvtsi2sd $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
+
+(rule (lower (has_type ty (fcvt_from_uint val @ (value_type $I64))))
+      (cvt_u64_to_float_seq ty val))
+
+;; Algorithm uses unpcklps to help create a float that is equivalent
+;; 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
+;; every value of the mantissa represents a corresponding uint32 number.
+;; When we subtract 0x1.0p52 we are left with double(src).
+(rule (lower (has_type $F64X2 (fcvt_from_uint (uwiden_low val @ (value_type $I32X4)))))
+      (let ((uint_mask Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_const)))
+            (res Xmm (x64_unpcklps val uint_mask))
+            (uint_mask_high Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_high_const))))
+        (x64_subpd res uint_mask_high)))
+
+;; When AVX512VL and AVX512F are available,
+;; `fcvt_from_uint` can be lowered to a single instruction.
+;;
+;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4,
+;; as it doesn't require either of the avx512 extensions to be enabled.
+(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4)
+                         (fcvt_from_uint src)))
+      (x64_vcvtudq2ps src))
+
+;; Converting packed unsigned integers to packed floats
+;; requires a few steps. There is no single instruction
+;; lowering for converting unsigned floats but there is for
+;; converting packed signed integers to float (cvtdq2ps). In
+;; the steps below we isolate the upper half (16 bits) and
+;; lower half (16 bits) of each lane and then we convert
+;; each half separately using cvtdq2ps meant for signed
+;; integers. In order for this to work for the upper half
+;; bits we must shift right by 1 (divide by 2) these bits in
+;; order to ensure the most significant bit is 0 not signed,
+;; and then after the conversion we double the value.
+;; Finally we add the converted values where addition will
+;; correctly round.
+;;
+;; Sequence:
+;; -> A = 0xffffffff
+;; -> Ah = 0xffff0000
+;; -> Al = 0x0000ffff
+;; -> Convert(Al) // Convert int to float
+;; -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed
+;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
+;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
+;; -> dst = Ah + Al // Add the two floats together
+(rule (lower (has_type $F32X4 (fcvt_from_uint a)))
+      (let (;;  get the low 16 bits
+            (a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
+            (a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
+
+            ;; get the high 16 bits
+            (a_hi Xmm (x64_psubd a a_lo))
+
+            ;; convert the low 16 bits
+            (a_lo Xmm (x64_cvtdq2ps a_lo))
+
+            ;; shift the high bits by 1, convert, and double to get the correct
+            ;; value
+            (a_hi Xmm (x64_psrld a_hi (RegMemImm.Imm 1)))
+            (a_hi Xmm (x64_cvtdq2ps a_hi))
+            (a_hi Xmm (x64_addps a_hi a_hi)))
+
+        ;; add together the two converted values
+        (x64_addps a_hi a_lo)))
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index ebf2eca43572..0ad745c17b8f 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -166,57 +166,6 @@ fn input_to_reg_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegM
     )
 }
 
-/// An extension specification for `extend_input_to_reg`.
-#[derive(Clone, Copy)]
-enum ExtSpec {
-    #[allow(dead_code)]
-    ZeroExtendTo32,
-    ZeroExtendTo64,
-    #[allow(dead_code)]
-    SignExtendTo32,
-    #[allow(dead_code)] // not used just yet but may be used in the future!
-    SignExtendTo64,
-}
-
-/// Put the given input into a register, marking it as used, and do a zero- or signed- extension if
-/// required. (This obviously causes side-effects.)
-fn extend_input_to_reg<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    spec: InsnInput,
-    ext_spec: ExtSpec,
-) -> Reg {
-    let requested_size = match ext_spec {
-        ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32,
-        ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64,
-    };
-    let input_size = ctx.input_ty(spec.insn, spec.input).bits();
-
-    let requested_ty = if requested_size == 32 {
-        types::I32
-    } else {
-        types::I64
-    };
-
-    let ext_mode = match (input_size, requested_size) {
-        (a, b) if a == b => return put_input_in_reg(ctx, spec),
-        (1, 8) => return put_input_in_reg(ctx, spec),
-        (a, b) => ExtMode::new(a.try_into().unwrap(), b.try_into().unwrap())
-            .unwrap_or_else(|| panic!("invalid extension: {} -> {}", a, b)),
-    };
-
-    let src = input_to_reg_mem(ctx, spec);
-    let dst = ctx.alloc_tmp(requested_ty).only_reg().unwrap();
-    match ext_spec {
-        ExtSpec::ZeroExtendTo32 | ExtSpec::ZeroExtendTo64 => {
-            ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst))
-        }
-        ExtSpec::SignExtendTo32 | ExtSpec::SignExtendTo64 => {
-            ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst))
-        }
-    }
-    dst.to_reg()
-}
-
 fn input_to_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Option<u64> {
     ctx.get_input_as_source_or_const(spec.insn, spec.input)
         .constant
@@ -629,207 +578,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         | Opcode::Selectif
         | Opcode::SelectifSpectreGuard
         | Opcode::FcvtFromSint
-        | Opcode::FcvtLowFromSint => {
+        | Opcode::FcvtLowFromSint
+        | Opcode::FcvtFromUint => {
             implemented_in_isle(ctx);
         }
 
-        Opcode::FcvtFromUint => {
-            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let ty = ty.unwrap();
-            let input_ty = ctx.input_ty(insn, 0);
-            let output_ty = ctx.output_ty(insn, 0);
-
-            if !ty.is_vector() {
-                match input_ty {
-                    types::I8 | types::I16 | types::I32 => {
-                        // Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
-                        // do a signed conversion (which won't overflow).
-                        let opcode = if ty == types::F32 {
-                            SseOpcode::Cvtsi2ss
-                        } else {
-                            assert_eq!(ty, types::F64);
-                            SseOpcode::Cvtsi2sd
-                        };
-
-                        let src = RegMem::reg(extend_input_to_reg(
-                            ctx,
-                            inputs[0],
-                            ExtSpec::ZeroExtendTo64,
-                        ));
-                        ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
-                    }
-
-                    types::I64 => {
-                        let src = put_input_in_reg(ctx, inputs[0]);
-
-                        let src_copy = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                        ctx.emit(Inst::gen_move(src_copy, src, types::I64));
-
-                        let tmp_gpr1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                        let tmp_gpr2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                        ctx.emit(Inst::cvt_u64_to_float_seq(
-                            if ty == types::F64 {
-                                OperandSize::Size64
-                            } else {
-                                OperandSize::Size32
-                            },
-                            src_copy,
-                            tmp_gpr1,
-                            tmp_gpr2,
-                            dst,
-                        ));
-                    }
-                    _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
-                };
-            } else if output_ty == types::F64X2 {
-                if let Some(uwiden) = matches_input(ctx, inputs[0], Opcode::UwidenLow) {
-                    let uwiden_input = InsnInput {
-                        insn: uwiden,
-                        input: 0,
-                    };
-                    let src = put_input_in_reg(ctx, uwiden_input);
-                    let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                    let input_ty = ctx.input_ty(uwiden, 0);
-
-                    // Matches_input further obfuscates which Wasm instruction this is ultimately
-                    // lowering. Check here that the types are as expected for F64x2ConvertLowI32x4U.
-                    debug_assert!(input_ty == types::I32X4);
-
-                    // Algorithm uses unpcklps to help create a float that is equivalent
-                    // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
-                    // every value of the mantissa represents a corresponding uint32 number.
-                    // When we subtract 0x1.0p52 we are left with double(src).
-                    let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                    ctx.emit(Inst::gen_move(dst, src, types::I32X4));
-
-                    static UINT_MASK: [u8; 16] = [
-                        0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
-                        0x00, 0x00, 0x00, 0x00,
-                    ];
-
-                    let uint_mask_const =
-                        ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
-
-                    ctx.emit(Inst::xmm_load_const(
-                        uint_mask_const,
-                        uint_mask,
-                        types::I32X4,
-                    ));
-
-                    // Creates 0x1.0p52 + double(src)
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Unpcklps,
-                        RegMem::from(uint_mask),
-                        dst,
-                    ));
-
-                    static UINT_MASK_HIGH: [u8; 16] = [
-                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00,
-                        0x00, 0x00, 0x30, 0x43,
-                    ];
-
-                    let uint_mask_high_const =
-                        ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH));
-                    let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                    ctx.emit(Inst::xmm_load_const(
-                        uint_mask_high_const,
-                        uint_mask_high,
-                        types::I32X4,
-                    ));
-
-                    // 0x1.0p52 + double(src) - 0x1.0p52
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Subpd,
-                        RegMem::from(uint_mask_high),
-                        dst,
-                    ));
-                } else {
-                    panic!("Unsupported FcvtFromUint conversion types: {}", ty);
-                }
-            } else {
-                assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
-                let src = put_input_in_reg(ctx, inputs[0]);
-                let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-                if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
-                    // When AVX512VL and AVX512F are available,
-                    // `fcvt_from_uint` can be lowered to a single instruction.
-                    ctx.emit(Inst::xmm_unary_rm_r_evex(
-                        Avx512Opcode::Vcvtudq2ps,
-                        RegMem::reg(src),
-                        dst,
-                    ));
-                } else {
-                    // Converting packed unsigned integers to packed floats
-                    // requires a few steps. There is no single instruction
-                    // lowering for converting unsigned floats but there is for
-                    // converting packed signed integers to float (cvtdq2ps). In
-                    // the steps below we isolate the upper half (16 bits) and
-                    // lower half (16 bits) of each lane and then we convert
-                    // each half separately using cvtdq2ps meant for signed
-                    // integers. In order for this to work for the upper half
-                    // bits we must shift right by 1 (divide by 2) these bits in
-                    // order to ensure the most significant bit is 0 not signed,
-                    // and then after the conversion we double the value.
-                    // Finally we add the converted values where addition will
-                    // correctly round.
-                    //
-                    // Sequence:
-                    // -> A = 0xffffffff
-                    // -> Ah = 0xffff0000
-                    // -> Al = 0x0000ffff
-                    // -> Convert(Al) // Convert int to float
-                    // -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed
-                    // -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
-                    // -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
-                    // -> dst = Ah + Al // Add the two floats together
-
-                    // Create a temporary register
-                    let tmp = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
-                    ctx.emit(Inst::xmm_unary_rm_r(
-                        SseOpcode::Movapd,
-                        RegMem::reg(src),
-                        tmp,
-                    ));
-                    ctx.emit(Inst::gen_move(dst, src, ty));
-
-                    // Get the low 16 bits
-                    ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(16), tmp));
-                    ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(16), tmp));
-
-                    // Get the high 16 bits
-                    ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
-
-                    // Convert the low 16 bits
-                    ctx.emit(Inst::xmm_unary_rm_r(
-                        SseOpcode::Cvtdq2ps,
-                        RegMem::from(tmp),
-                        tmp,
-                    ));
-
-                    // Shift the high bits by 1, convert, and double to get the correct value.
-                    ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
-                    ctx.emit(Inst::xmm_unary_rm_r(
-                        SseOpcode::Cvtdq2ps,
-                        RegMem::from(dst),
-                        dst,
-                    ));
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Addps,
-                        RegMem::reg(dst.to_reg()),
-                        dst,
-                    ));
-
-                    // Add together the two converted values.
-                    ctx.emit(Inst::xmm_rm_r(
-                        SseOpcode::Addps,
-                        RegMem::reg(tmp.to_reg()),
-                        dst,
-                    ));
-                }
-            }
-        }
-
         Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {
             let src = put_input_in_reg(ctx, inputs[0]);
             let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs
index fb9abb6319b6..8fffd3857ffe 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -770,6 +770,18 @@ where
     fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
         targets.len() as u32
     }
+
+    #[inline]
+    fn fcvt_uint_mask_const(&mut self) -> VCodeConstant {
+        self.lower_ctx
+            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
+    }
+
+    #[inline]
+    fn fcvt_uint_mask_high_const(&mut self) -> VCodeConstant {
+        self.lower_ctx
+            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH))
+    }
 }
 
 impl<C> IsleContext<'_, C, Flags, IsaFlags, 6>
@@ -891,3 +903,11 @@ fn to_simm32(constant: i64) -> Option<GprMemImm> {
         None
     }
 }
+
+const UINT_MASK: [u8; 16] = [
+    0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+];
+
+const UINT_MASK_HIGH: [u8; 16] = [
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
+];
diff --git a/cranelift/filetests/filetests/isa/x64/fcvt-simd.clif b/cranelift/filetests/filetests/isa/x64/fcvt-simd.clif
new file mode 100644
index 000000000000..8ac1b0d94c31
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/fcvt-simd.clif
@@ -0,0 +1,18 @@
+test compile precise-output
+set enable_simd
+target x86_64 has_avx512vl has_avx512f
+
+function %f1(i32x4) -> f32x4 {
+block0(v0: i32x4):
+  v1 = fcvt_from_uint.f32x4 v0
+  return v1
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   vcvtudq2ps %xmm0, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif
index 65d257050d35..09d7c80336eb 100644
--- a/cranelift/filetests/filetests/isa/x64/fcvt.clif
+++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif
@@ -131,3 +131,72 @@ block0(v0: i32x4):
 ;   popq    %rbp
 ;   ret
 
+function %f10(i8, i16, i32, i64) -> f32 {
+block0(v0: i8, v1: i16, v2: i32, v3: i64):
+  v4 = fcvt_from_uint.f32 v0
+  v5 = fcvt_from_uint.f32 v1
+  v6 = fcvt_from_uint.f32 v2
+  v7 = fcvt_from_uint.f32 v3
+  v8 = fadd.f32 v4, v5
+  v9 = fadd.f32 v8, v6
+  v10 = fadd.f32 v9, v7
+  return v10
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbq  %dil, %rax
+;   cvtsi2ss %rax, %xmm0
+;   movzwq  %si, %rax
+;   cvtsi2ss %rax, %xmm6
+;   movl    %edx, %eax
+;   cvtsi2ss %rax, %xmm7
+;   u64_to_f32_seq %rcx, %xmm4, %r8, %rdx
+;   addss   %xmm0, %xmm6, %xmm0
+;   addss   %xmm0, %xmm7, %xmm0
+;   addss   %xmm0, %xmm4, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f11(i32x4) -> f64x2 {
+block0(v0: i32x4):
+  v1 = uwiden_low v0
+  v2 = fcvt_from_uint.f64x2 v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   load_const VCodeConstant(0), %xmm3
+;   unpcklps %xmm0, %xmm3, %xmm0
+;   load_const VCodeConstant(1), %xmm7
+;   subpd   %xmm0, %xmm7, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f12(i32x4) -> f32x4 {
+block0(v0: i32x4):
+  v1 = fcvt_from_uint.f32x4 v0
+  return v1
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movdqa  %xmm0, %xmm4
+;   pslld   %xmm4, $16, %xmm4
+;   psrld   %xmm4, $16, %xmm4
+;   psubd   %xmm0, %xmm4, %xmm0
+;   cvtdq2ps %xmm4, %xmm9
+;   psrld   %xmm0, $1, %xmm0
+;   cvtdq2ps %xmm0, %xmm0
+;   addps   %xmm0, %xmm0, %xmm0
+;   addps   %xmm0, %xmm9, %xmm0
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+