cranelift: Remove the fcvt_low_from_sint instruction (bytecodeallia…

…nce#6565) * cranelift: Remove the `fcvt_low_from_sint` instruction This commit removes this instruction since it's a combination of `swiden_low` plus `fcvt_from_sint`. This was used by the WebAssembly `f64x2.convert_low_i32x4_s` instruction previously but the corresponding unsigned variant of the instruction, `f64x2.convert_low_i32x4_u`, used a `uwiden_low` plus `fcvt_from_uint` combo. To help simplify Cranelift's instruction set and to make these two instructions mirrors of each other the Cranelift instruction is removed. The s390x and AArch64 backend lowering rules for this instruction could simply be deleted as the previous combination of the `swiden_low` and `fcvt_from_sint` lowering rules produces the same code. The x64 backend moved its lowering to a special case of the `fcvt_from_sint` lowering. * Fix cranelift-fuzzgen build
salewski · Jun 12, 2023 · a9611a8 · a9611a8
1 parent a986ce9
commit a9611a8
Show file tree

Hide file tree

Showing 12 changed files with 33 additions and 121 deletions.
diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3650,26 +3650,6 @@ pub(crate) fn define(
         .operands_out(vec![Operand::new("a", FloatTo)]),
     );
 
-    ig.push(
-        Inst::new(
-            "fcvt_low_from_sint",
-            r#"
-        Converts packed signed 32-bit integers to packed double precision floating point.
-
-        Considering only the low half of the register, each lane in `x` is interpreted as a
-        signed 32-bit integer that is then converted to a double precision float. This
-        instruction differs from fcvt_from_sint in that it converts half the number of lanes
-        which are converted to occupy twice the number of bits. No rounding should be needed
-        for the resulting float.
-
-        The result type will have half the number of vector lanes as the input.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![Operand::new("x", Int)])
-        .operands_out(vec![Operand::new("a", FloatTo)]),
-    );
-
     let WideInt = &TypeVar::new(
         "WideInt",
         "An integer type of width `i16` upwards",

diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -2831,13 +2831,6 @@
 (rule (lower (has_type (tls_model (TlsModel.Macho)) (tls_value (symbol_value_data name _ _))))
       (macho_tls_get_addr name))
 
-;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $F64X2 (fcvt_low_from_sint val)))
-      (let ((extended Reg (vec_extend (VecExtendOp.Sxtl) val $false (ScalarSize.Size64)))
-            (converted Reg (vec_misc (VecMisc2.Scvtf) extended (VectorSize.Size64x2))))
-        converted))
-
 ;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (fvpromote_low val))

diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -1584,14 +1584,6 @@
       (fcvt_from_sint_reg $F64X2 (FpuRoundMode.ToNearestTiesToEven) x))
 
 
-;;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Convert the low half of a $I32X4 to a $F64X2.
-(rule (lower (has_type $F64X2 (fcvt_low_from_sint x @ (value_type $I32X4))))
-      (fcvt_from_sint_reg $F64X2 (FpuRoundMode.ToNearestTiesToEven)
-                          (vec_unpacks_low_lane_order $I32X4 x)))
-
-
 ;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Convert a scalar floating-point value in a register to an unsigned integer.

diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3253,9 +3253,7 @@
 (rule 0 (lower (fcvt_from_sint a @ (value_type $I32X4)))
       (x64_cvtdq2ps a))
 
-;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (fcvt_low_from_sint a))
+(rule 1 (lower (has_type $F64X2 (fcvt_from_sint (swiden_low a @ (value_type $I32X4)))))
       (x64_cvtdq2pd a))
 
 ;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
@@ -139,20 +139,21 @@ block0(v0: i64):
 
 function %f9(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   sxtl v2.2d, v0.2s
-;   scvtf v0.2d, v2.2d
+;   sxtl v3.2d, v0.2s
+;   scvtf v0.2d, v3.2d
 ;   ret
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   sshll v2.2d, v0.2s, #0
-;   scvtf v0.2d, v2.2d
+;   sshll v3.2d, v0.2s, #0
+;   scvtf v0.2d, v3.2d
 ;   ret
 
 function %f10(i8, i16, i32, i64) -> f32 {

diff --git a/cranelift/filetests/filetests/isa/s390x/vec-fp.clif b/cranelift/filetests/filetests/isa/s390x/vec-fp.clif
@@ -865,38 +865,40 @@ block0(v0: i64x2):
 
 function %fcvt_low_from_sint_i32x4_f64x2_be(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   vuphf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuphf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   vuphf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuphf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 
 function %fcvt_low_from_sint_i32x4_f64x2_le(i32x4) -> f64x2 wasmtime_system_v {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:
 ; block0:
-;   vuplf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuplf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 ;
 ; Disassembled:
 ; block0: ; offset 0x0
-;   vuplf %v2, %v24
-;   vcdgb %v24, %v2, 0, 4
+;   vuplf %v3, %v24
+;   vcdgb %v24, %v3, 0, 4
 ;   br %r14
 
 function %fcvt_to_uint_sat_f32x4_i32x4(f32x4) -> i32x4 {

diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif
@@ -211,8 +211,9 @@ block0(v0: i64):
 
 function %f9(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:

diff --git a/cranelift/filetests/filetests/isa/x64/float-avx.clif b/cranelift/filetests/filetests/isa/x64/float-avx.clif
@@ -404,8 +404,9 @@ block0(v0: f64x2):
 
 function %fcvt_low_from_sint(i32x4) -> f64x2 {
 block0(v0: i32x4):
-  v1 = fcvt_low_from_sint.f64x2 v0
-  return v1
+  v1 = swiden_low v0
+  v2 = fcvt_from_sint.f64x2 v1
+  return v2
 }
 
 ; VCode:

diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif
@@ -46,8 +46,9 @@ block0(v0:f32x4):
 
 function %fcvt_low_from_sint(i32x4) -> f64x2 {
 block0(v0: i32x4):
-    v1 = fcvt_low_from_sint.f64x2 v0
-    return v1
+    v1 = swiden_low v0
+    v2 = fcvt_from_sint.f64x2 v1
+    return v2
 }
 ; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
 ; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]

diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs
@@ -1387,41 +1387,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
                 (Opcode::FcvtFromSint, &[I8X16], &[F64X2]),
                 (Opcode::FcvtFromSint, &[I16X8], &[F64X2]),
                 (Opcode::FcvtFromSint, &[I32X4], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F32]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F64]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I32X4], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F32X4]),
-                (Opcode::FcvtLowFromSint, &[I8], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I16], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I32], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I64], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I128], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I8X16], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I16X8], &[F64X2]),
-                (Opcode::FcvtLowFromSint, &[I64X2], &[F64X2]),
             )
         })
         .collect()

diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
@@ -1157,29 +1157,6 @@ where
                 ctrl_ty,
             )?)
         }
-        Opcode::FcvtLowFromSint => {
-            let in_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
-            let x = extractlanes(&arg(0), in_ty)?;
-
-            assign(vectorizelanes(
-                &(x[..(ctrl_ty.lane_count() as usize)]
-                    .into_iter()
-                    .map(|x| {
-                        DataValue::float(
-                            match ctrl_ty.lane_type() {
-                                types::F32 => {
-                                    (x.to_owned().into_int_signed()? as f32).to_bits() as u64
-                                }
-                                types::F64 => (x.to_owned().into_int_signed()? as f64).to_bits(),
-                                _ => unimplemented!("unexpected promotion to {:?}", ctrl_ty),
-                            },
-                            ctrl_ty.lane_type(),
-                        )
-                    })
-                    .collect::<ValueResult<SimdVec<DataValue>>>()?),
-                ctrl_ty,
-            )?)
-        }
         Opcode::FvpromoteLow => {
             let in_ty = inst_context.type_of(inst_context.args()[0]).unwrap();
             assert_eq!(in_ty, types::F32X4);

diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
@@ -1908,7 +1908,8 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         }
         Operator::F64x2ConvertLowI32x4S => {
             let a = pop1_with_bitcast(state, I32X4, builder);
-            state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
+            let widened_a = builder.ins().swiden_low(a);
+            state.push1(builder.ins().fcvt_from_sint(F64X2, widened_a));
         }
         Operator::F64x2ConvertLowI32x4U => {
             let a = pop1_with_bitcast(state, I32X4, builder);