diff --git a/build.rs b/build.rs
index 3c506a4ebecf..fc633e18c2ad 100644
--- a/build.rs
+++ b/build.rs
@@ -233,11 +233,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "cvt_from_uint",
                 "issue_3327_bnot_lowering",
                 "simd_conversions",
-                "simd_f32x4",
-                "simd_f32x4_pmin_pmax",
                 "simd_f32x4_rounding",
-                "simd_f64x2",
-                "simd_f64x2_pmin_pmax",
                 "simd_f64x2_rounding",
                 "simd_i32x4_trunc_sat_f32x4",
                 "simd_i32x4_trunc_sat_f64x2",
diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle
index ec70bbe361d9..e4ab3354e06f 100644
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -1610,6 +1610,11 @@
 
 ;; Float Helpers
 
+;; Returns the bitpattern of the Canonical NaN for the given type.
+(decl pure canonical_nan_u64 (Type) u64)
+(rule (canonical_nan_u64 $F32) 0x7fc00000)
+(rule (canonical_nan_u64 $F64) 0x7ff8000000000000)
+
 (decl gen_default_frm () OptionFloatRoundingMode)
 (extern constructor gen_default_frm gen_default_frm)
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index 239c479efbd4..091d2128db17 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -1744,6 +1744,9 @@ impl Inst {
                     (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
                         format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
+                    (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
                     (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
                         format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
                     }
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index 9fc038bade8d..19e4b2216339 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -345,9 +345,13 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VsadduVV
             | VecAluOpRRR::VsadduVX => 0b100000,
             VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001,
+            VecAluOpRRR::VfminVV => 0b000100,
+            VecAluOpRRR::VfmaxVV => 0b000110,
             VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010,
             VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011,
+            VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000,
             VecAluOpRRR::VfsgnjnVV => 0b001001,
+            VecAluOpRRR::VfsgnjxVV => 0b001010,
             VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100,
             VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000,
             VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001,
@@ -473,7 +477,11 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VfsubVV
             | VecAluOpRRR::VfmulVV
             | VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfmaxVV
+            | VecAluOpRRR::VfminVV
+            | VecAluOpRRR::VfsgnjVV
             | VecAluOpRRR::VfsgnjnVV
+            | VecAluOpRRR::VfsgnjxVV
             | VecAluOpRRR::VmfeqVV
             | VecAluOpRRR::VmfneVV
             | VecAluOpRRR::VmfltVV
@@ -485,6 +493,7 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VfdivVF
             | VecAluOpRRR::VfrdivVF
             | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VfsgnjVF
             | VecAluOpRRR::VmfeqVF
             | VecAluOpRRR::VmfneVF
             | VecAluOpRRR::VmfltVF
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index a51e96cdf4d7..49133a2ae2fb 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -122,7 +122,11 @@
   (VfsubVV)
   (VfmulVV)
   (VfdivVV)
+  (VfminVV)
+  (VfmaxVV)
+  (VfsgnjVV)
   (VfsgnjnVV)
+  (VfsgnjxVV)
   (VmergeVVM)
   (VredmaxuVS)
   (VredminuVS)
@@ -180,6 +184,7 @@
   (VfrsubVF)
   (VfmulVF)
   (VfdivVF)
+  (VfsgnjVF)
   (VfrdivVF)
   (VmergeVXM)
   (VfmergeVFM)
@@ -836,6 +841,27 @@
 (rule (rv_vfrdiv_vf vs2 vs1 mask vstate)
   (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate))
 
+;; Helper for emitting the `vfmin.vv` instruction.
+(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmin_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmax.vv` instruction.
+(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmax_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction.
+;; The output of this instruction is `vs2` with the sign bit from `vs1`
+(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction.
+(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate))
+
 ;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
 ;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
 (decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg)
@@ -847,6 +873,18 @@
 (decl rv_vfneg_v (VReg VecOpMasking VState) VReg)
 (rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate))
 
+;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction.
+;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`.
+;; When `vs2 == vs1` this implements `fabs`
+(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfabs.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs`
+(decl rv_vfabs_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate))
+
 ;; Helper for emitting the `vfsqrt.v` instruction.
 ;; This instruction splats the F regsiter into all elements of the destination vector.
 (decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg)
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index bc46b8279323..a34cd6f321a0 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -981,9 +981,12 @@
 
 
 ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (fabs x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (fabs x)))
   (rv_fabs ty x))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fabs x)))
+  (rv_vfabs_v x (unmasked) ty))
+
 ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule 0 (lower (has_type (ty_scalar_float ty) (fneg x)))
   (rv_fneg ty x))
@@ -992,9 +995,15 @@
   (rv_vfneg_v x (unmasked) ty))
 
 ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (fcopysign x y)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (fcopysign x y)))
   (rv_fsgnj ty x y))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x y)))
+  (rv_vfsgnj_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x (splat y))))
+  (rv_vfsgnj_vf x y (unmasked) ty))
+
 ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type ty (fma x y z)))
   (rv_fmadd ty x y z))
@@ -1169,24 +1178,60 @@
 (rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y)))
   (rv_vfrdiv_vf y x (unmasked) ty))
 
-;;;; Rules for `fmin/fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule
-  (lower (has_type ty (fmin x y)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y)))
   (gen_float_select (FloatSelectOP.Min) x y ty))
 
-(rule
-  (lower (has_type ty (fmin_pseudo x y)))
+;; vfmin does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmin` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (min VReg (rv_vfmin_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan min is_not_nan ty)))
+
+;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y)))
+  (gen_float_select (FloatSelectOP.Max) x y ty))
+
+;; vfmax does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmax` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (max VReg (rv_vfmax_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan max is_not_nan ty)))
+
+;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmin_pseudo x y)))
   (gen_float_select_pseudo (FloatSelectOP.Min) x y ty))
 
-(rule
-  (lower (has_type ty (fmax x y)))
-  (gen_float_select (FloatSelectOP.Max) x y ty))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin_pseudo x y)))
+  (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) y x)))
+    (rv_vmerge_vvm x y mask ty)))
 
-(rule
-  (lower (has_type ty (fmax_pseudo x y)))
+;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmax_pseudo x y)))
   (gen_float_select_pseudo (FloatSelectOP.Max) x y ty))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax_pseudo x y)))
+  (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) x y)))
+    (rv_vmerge_vvm x y mask ty)))
+
 ;;;;;  Rules for `stack_addr`;;;;;;;;;
 (rule
   (lower (stack_addr ss offset))
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif
new file mode 100644
index 000000000000..f41147eb48d8
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif
@@ -0,0 +1,83 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fabs_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = fabs v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fabs_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = fabs v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif
new file mode 100644
index 000000000000..ab2fef1091c2
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif
@@ -0,0 +1,169 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcopysign v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsgnj.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x22
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fcopysign_splat_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcopysign v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsgnj.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x22
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcopysign v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsgnj.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x22
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fcopysign_splat_f64x4(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcopysign v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsgnj.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x22
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif
new file mode 100644
index 000000000000..c1b3f21cffb4
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif
@@ -0,0 +1,92 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0:f32x4, v1:f32x4):
+    v2 = fmax_pseudo v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v8,v1,v3,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x6e
+;   .byte 0x57, 0x84, 0x11, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0:f64x2, v1:f64x2):
+    v2 = fmax_pseudo v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v8,v1,v3,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x6e
+;   .byte 0x57, 0x84, 0x11, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif
new file mode 100644
index 000000000000..eef98e78f2f3
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif
@@ -0,0 +1,116 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmax v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   auipc t4,0; ld t4,12(t4); j 12; .8byte 0x7ff8000000000000
+;   vmv.v.x v14,t4 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfmax.vv v16,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v18,v14,v16,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   auipc t4, 0
+;   ld t4, 0xc(t4)
+;   j 0xc
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0xf8, 0x7f
+;   .byte 0x57, 0xc7, 0x0e, 0x5e
+;   .byte 0x57, 0x98, 0x11, 0x1a
+;   .byte 0x57, 0x09, 0xe8, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x09, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmax_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmax v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui t4,523264
+;   vmv.v.x v14,t4 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfmax.vv v16,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v18,v14,v16,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   lui t4, 0x7fc00
+;   .byte 0x57, 0xc7, 0x0e, 0x5e
+;   .byte 0x57, 0x98, 0x11, 0x1a
+;   .byte 0x57, 0x09, 0xe8, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x09, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif
new file mode 100644
index 000000000000..608ad3767dc9
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif
@@ -0,0 +1,92 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0:f32x4, v1:f32x4):
+    v2 = fmin_pseudo v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v8,v1,v3,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x6e
+;   .byte 0x57, 0x84, 0x11, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0:f64x2, v1:f64x2):
+    v2 = fmin_pseudo v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v8,v1,v3,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x6e
+;   .byte 0x57, 0x84, 0x11, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif
new file mode 100644
index 000000000000..6a8d643db748
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif
@@ -0,0 +1,116 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmin v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   auipc t4,0; ld t4,12(t4); j 12; .8byte 0x7ff8000000000000
+;   vmv.v.x v14,t4 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfmin.vv v16,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v18,v14,v16,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   auipc t4, 0
+;   ld t4, 0xc(t4)
+;   j 0xc
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0xf8, 0x7f
+;   .byte 0x57, 0xc7, 0x0e, 0x5e
+;   .byte 0x57, 0x98, 0x11, 0x12
+;   .byte 0x57, 0x09, 0xe8, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x09, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmin_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmin v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui t4,523264
+;   vmv.v.x v14,t4 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfmin.vv v16,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v18,v14,v16,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   lui t4, 0x7fc00
+;   .byte 0x57, 0xc7, 0x0e, 0x5e
+;   .byte 0x57, 0x98, 0x11, 0x12
+;   .byte 0x57, 0x09, 0xe8, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x09, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-fabs.clif b/cranelift/filetests/filetests/runtests/simd-fabs.clif
new file mode 100644
index 000000000000..45d63d728582
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fabs.clif
@@ -0,0 +1,24 @@
+test run
+target aarch64
+target s390x
+target x86_64
+target riscv64 has_v
+
+function %fabs_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = fabs v0
+    return v1
+}
+; run: %fabs_f32x4([0x0.5 -0x1.5 0x1.1p10 -0x1.4cccccp0]) == [0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0]
+; run: %fabs_f32x4([0x0.0 -0x0.0 Inf -Inf]) == [0x0.0 0x0.0 Inf Inf]
+; run: %fabs_f32x4([NaN -NaN Inf -Inf]) == [NaN NaN Inf Inf]
+
+function %fabs_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = fabs v0
+    return v1
+}
+; run: %fabs_f64x2([0x0.5 -0x1.5]) == [0x0.5 0x1.5]
+; run: %fabs_f64x2([0x0.0 -0x0.0]) == [0x0.0 0x0.0]
+; run: %fabs_f64x2([Inf -Inf]) == [Inf Inf]
+; run: %fabs_f64x2([NaN -NaN]) == [NaN NaN]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif b/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif
index 253e4e74d6e8..8bdb5f4e5eb0 100644
--- a/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif
@@ -1,6 +1,7 @@
 test interpret
 test run
 target aarch64
+target riscv64 has_v
 ; x86_64 and s390x do not support 64-bit vectors in `fcopysign`.
 
 function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 {
diff --git a/cranelift/filetests/filetests/runtests/simd-fcopysign.clif b/cranelift/filetests/filetests/runtests/simd-fcopysign.clif
index 331301038785..ebabcd0c5286 100644
--- a/cranelift/filetests/filetests/runtests/simd-fcopysign.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fcopysign.clif
@@ -2,6 +2,7 @@ test interpret
 test run
 target s390x
 target aarch64
+target riscv64 has_v
 ; x86_64 does not support SIMD fcopysign.
 
 function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 {
diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif
index 92ffddeef20f..28dc45aaea31 100644
--- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif
@@ -12,6 +12,9 @@ block0(v0: f64x2, v1: f64x2):
 }
 
 ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0]
+; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
+; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -22,3 +25,4 @@ block0(v0: f64x2, v1: f64x2):
 ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN NaN]
 ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
 ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0]
+; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif
new file mode 100644
index 000000000000..b13ff253c62d
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif
@@ -0,0 +1,29 @@
+; Test the non-deterministic aspects of the SIMD arithmetic operations.
+; If you change this file, you should most likely update
+; simd-arithmetic-nondeterministic*.clif as well.
+test run
+target riscv64gc has_v
+
+;; With the current implementation on RISC-V we always return a positive Canonical NaN
+;; if any input is NaN. This is compatible with the spec but different from the
+;; other architectures.
+
+function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmax v0, v1
+    return v2
+}
+; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [NaN NaN]
+; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [NaN 0.0]
+
+function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmin v0, v1
+    return v2
+}
+; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [NaN NaN]
+; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
+; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
+; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [NaN 0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif
index c257bcdf008c..abe6c935abc6 100644
--- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif
@@ -14,6 +14,9 @@ block0(v0: f64x2, v1: f64x2):
 ; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism
 ; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
 ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
+; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -26,3 +29,4 @@ block0(v0: f64x2, v1: f64x2):
 ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN]
 ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0]
 ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0]
+; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif
index 5b29816e7274..06cb0387427b 100644
--- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif
@@ -3,6 +3,7 @@ target aarch64
 target s390x
 target x86_64
 target x86_64 skylake
+target riscv64gc has_v
 
 function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -12,9 +13,6 @@ block0(v0: f64x2, v1: f64x2):
 ; This operation exhibits non-deterministic behaviour for some input NaN values;
 ; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests.
 ; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
-; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
-; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
-; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
@@ -24,4 +22,3 @@ block0(v0: f64x2, v1: f64x2):
 ; This operation exhibits non-deterministic behaviour for some input NaN values;
 ; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests.
 ; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
-; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif b/cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif
similarity index 98%
rename from cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif
rename to cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif
index df6d5b0fc3d9..5cb46d1ad38d 100644
--- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif
@@ -3,6 +3,7 @@ target aarch64
 ; target s390x FIXME: This currently fails under qemu due to a qemu bug
 target x86_64
 target x86_64 skylake
+target riscv64gc has_v
 
 function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 {
 block0(v0:f32x4, v1:f32x4):