From a9611a881a4c33d0f425278a4d2ab89c81be9509 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 12 Jun 2023 18:09:12 -0500
Subject: [PATCH] cranelift: Remove the `fcvt_low_from_sint` instruction
 (#6565)

* cranelift: Remove the `fcvt_low_from_sint` instruction

This commit removes this instruction since it's a combination of
`swiden_low` plus `fcvt_from_sint`. This was used by the WebAssembly
`f64x2.convert_low_i32x4_s` instruction previously but the corresponding
unsigned variant of the instruction, `f64x2.convert_low_i32x4_u`, used a
`uwiden_low` plus `fcvt_from_uint` combo. To help simplify Cranelift's
instruction set and to make these two instructions mirrors of each other
the Cranelift instruction is removed.

The s390x and AArch64 backend lowering rules for this instruction could
simply be deleted as the previous combination of the `swiden_low` and
`fcvt_from_sint` lowering rules produces the same code. The x64 backend
moved its lowering to a special case of the `fcvt_from_sint` lowering.

* Fix cranelift-fuzzgen build
---
 .../codegen/meta/src/shared/instructions.rs   | 20 -----------
 cranelift/codegen/src/isa/aarch64/lower.isle  |  7 ----
 cranelift/codegen/src/isa/s390x/lower.isle    |  8 -----
 cranelift/codegen/src/isa/x64/lower.isle      |  4 +--
 .../filetests/filetests/isa/aarch64/fcvt.clif | 13 +++----
 .../filetests/filetests/isa/s390x/vec-fp.clif | 26 +++++++-------
 .../filetests/filetests/isa/x64/fcvt.clif     |  5 +--
 .../filetests/isa/x64/float-avx.clif          |  5 +--
 .../filetests/runtests/simd-conversion.clif   |  5 +--
 cranelift/fuzzgen/src/function_generator.rs   | 35 -------------------
 cranelift/interpreter/src/step.rs             | 23 ------------
 cranelift/wasm/src/code_translator.rs         |  3 +-
 12 files changed, 33 insertions(+), 121 deletions(-)

diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 0995f116db52..14e397c16b69 100644
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3650,26 +3650,6 @@ pub(crate) fn define(
         .operands_out(vec![Operand::new("a", FloatTo)]),
     );
 
-    ig.push(
-        Inst::new(
-            "fcvt_low_from_sint",
-            r#"
-        Converts packed signed 32-bit integers to packed double precision floating point.
-
-        Considering only the low half of the register, each lane in `x` is interpreted as a
-        signed 32-bit integer that is then converted to a double precision float. This
-        instruction differs from fcvt_from_sint in that it converts half the number of lanes
-        which are converted to occupy twice the number of bits. No rounding should be needed
-        for the resulting float.
-
-        The result type will have half the number of vector lanes as the input.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![Operand::new("x", Int)])
-        .operands_out(vec![Operand::new("a", FloatTo)]),
-    );
-
     let WideInt = &TypeVar::new(
         "WideInt",
         "An integer type of width `i16` upwards",
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index 1be6e7259d67..b9a5f836356e 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -2831,13 +2831,6 @@
 (rule (lower (has_type (tls_model (TlsModel.Macho)) (tls_value (symbol_value_data name _ _))))
       (macho_tls_get_addr name))
 
-;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $F64X2 (fcvt_low_from_sint val)))
-      (let ((extended Reg (vec_extend (VecExtendOp.Sxtl) val $false (ScalarSize.Size64)))
-            (converted Reg (vec_misc (VecMisc2.Scvtf) extended (VectorSize.Size64x2))))
-        converted))
-
 ;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (fvpromote_low val))
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index 4368b358bde8..2b286aeeb2e4 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -1584,14 +1584,6 @@
       (fcvt_from_sint_reg $F64X2 (FpuRoundMode.ToNearestTiesToEven) x))
 
 
-;;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Convert the low half of a $I32X4 to a $F64X2.
-(rule (lower (has_type $F64X2 (fcvt_low_from_sint x @ (value_type $I32X4))))
-      (fcvt_from_sint_reg $F64X2 (FpuRoundMode.ToNearestTiesToEven)
-                          (vec_unpacks_low_lane_order $I32X4 x)))
-
-
 ;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Convert a scalar floating-point value in a register to an unsigned integer.
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 3d8599c874c7..690826ea18af 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3253,9 +3253,7 @@
 (rule 0 (lower (fcvt_from_sint a @ (value_type $I32X4)))
       (x64_cvtdq2ps a))
 
-;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (fcvt_low_from_sint a))
+(rule 1 (lower (has_type $F64X2 (fcvt_from_sint (swiden_low a @ (value_type $I32X4)))))
       (x64_cvtdq2pd a))
 
 ;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
index 71fa6977ed18..f1c5f0d959e5 100644
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
@@ -139,20 +139,21 @@ block0(v0: i64):
 
 function %f9(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   sxtl v2.2d, v0.2s
-;   scvtf v0.2d, v2.2d
+;   sxtl v3.2d, v0.2s
+;   scvtf v0.2d, v3.2d
 ;   ret
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   sshll v2.2d, v0.2s, #0
-;   scvtf v0.2d, v2.2d
+;   sshll v3.2d, v0.2s, #0
+;   scvtf v0.2d, v3.2d
 ;   ret
 
 function %f10(i8, i16, i32, i64) -> f32 {
diff --git a/cranelift/filetests/filetests/isa/s390x/vec-fp.clif b/cranelift/filetests/filetests/isa/s390x/vec-fp.clif
index 2cfbf7044a98..4ccb89adf7ba 100644
--- a/cranelift/filetests/filetests/isa/s390x/vec-fp.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-fp.clif
@@ -865,38 +865,40 @@ block0(v0: i64x2):
 
 function %fcvt_low_from_sint_i32x4_f64x2_be(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   vuphf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuphf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   vuphf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuphf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 
 function %fcvt_low_from_sint_i32x4_f64x2_le(i32x4) -> f64x2 wasmtime_system_v {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   vuplf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuplf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   vuplf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuplf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 
 function %fcvt_to_uint_sat_f32x4_i32x4(f32x4) -> i32x4 {
diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif
index 0b54b48d2441..cbe440745cf4 100644
--- a/cranelift/filetests/filetests/isa/x64/fcvt.clif
+++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif
@@ -211,8 +211,9 @@ block0(v0: i64):
 
 function %f9(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
diff --git a/cranelift/filetests/filetests/isa/x64/float-avx.clif b/cranelift/filetests/filetests/isa/x64/float-avx.clif
index 57a70682cdc4..95babfe3c651 100644
--- a/cranelift/filetests/filetests/isa/x64/float-avx.clif
+++ b/cranelift/filetests/filetests/isa/x64/float-avx.clif
@@ -404,8 +404,9 @@ block0(v0: f64x2):
 
 function %fcvt_low_from_sint(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif
index cda819c62dc2..261c41b0859d 100644
--- a/cranelift/filetests/filetests/runtests/simd-conversion.clif
+++ b/cranelift/filetests/filetests/runtests/simd-conversion.clif
@@ -46,8 +46,9 @@ block0(v0:f32x4):
 
 function %fcvt_low_from_sint(i32x4) -> f64x2 {
 block0(v0: i32x4):
-    v1 = fcvt_low_from_sint.f64x2 v0
-    return v1
+    v1 = swiden_low v0
+    v2 = fcvt_from_sint.f64x2 v1
+    return v2
 }
 ; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
 ; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs
index 504ee906da66..1e5626ded220 100644
--- a/cranelift/fuzzgen/src/function_generator.rs
+++ b/cranelift/fuzzgen/src/function_generator.rs
@@ -1387,41 +1387,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
                 (Opcode::FcvtFromSint, &[I8X16], &[F64X2]),
                 (Opcode::FcvtFromSint, &[I16X8], &[F64X2]),
                 (Opcode::FcvtFromSint, &[I32X4], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F64X2]),
             )
         })
         .collect()
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index d642219d2983..8c8e19c263b2 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -1157,29 +1157,6 @@ where
                 ctrl_ty,
             )?)
         }
-        Opcode::FcvtLowFromSint => {
-            let in_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
-            let x = extractlanes(&arg(0), in_ty)?;
-
-            assign(vectorizelanes(
-                &(x[..(ctrl_ty.lane_count() as usize)]
-                    .into_iter()
-                    .map(|x| {
-                        DataValue::float(
-                            match ctrl_ty.lane_type() {
-                                types::F32 => {
-                                    (x.to_owned().into_int_signed()? as f32).to_bits() as u64
-                                }
-                                types::F64 => (x.to_owned().into_int_signed()? as f64).to_bits(),
-                                _ => unimplemented!("unexpected promotion to {:?}", ctrl_ty),
-                            },
-                            ctrl_ty.lane_type(),
-                        )
-                    })
-                    .collect::<ValueResult<SimdVec<DataValue>>>()?),
-                ctrl_ty,
-            )?)
-        }
         Opcode::FvpromoteLow => {
             let in_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
             assert_eq!(in_ty, types::F32X4);
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index fc85a8c77c57..103afeadd884 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1908,7 +1908,8 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         }
         Operator::F64x2ConvertLowI32x4S => {
             let a = pop1_with_bitcast(state, I32X4, builder);
-            state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
+            let widened_a = builder.ins().swiden_low(a);
+            state.push1(builder.ins().fcvt_from_sint(F64X2, widened_a));
         }
         Operator::F64x2ConvertLowI32x4U => {
             let a = pop1_with_bitcast(state, I32X4, builder);