Skip to content

Commit

Permalink
Merge raw_bitcast and bitcast (#5175)
Browse files Browse the repository at this point in the history
- Allow bitcast for vectors with differing lane widths
- Remove raw_bitcast IR instruction
- Change all users of raw_bitcast to bitcast
- Implement support for no-op bitcast cases across backends

This implements the second step of the plan outlined here:
#4566 (comment)
  • Loading branch information
uweigand authored Nov 2, 2022
1 parent e0c8a7f commit 961107e
Show file tree
Hide file tree
Showing 26 changed files with 97 additions and 132 deletions.
28 changes: 0 additions & 28 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,6 @@ pub(crate) fn define(
.build(),
);

let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());

let Mem = &TypeVar::new(
"Mem",
"Any type that can be stored in memory",
Expand Down Expand Up @@ -3148,32 +3146,6 @@ pub(crate) fn define(
The input and output types must be storable to memory and of the same
size. A bitcast is equivalent to storing one type and loading the other
type from the same address.
For vector types, the lane types must also be the same size (see
`raw_bitcast` for changing the lane size).
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let x = &Operand::new("x", Any);
let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");

ig.push(
Inst::new(
"raw_bitcast",
r#"
Cast the bits in `x` as a different type of the same bit width.
This instruction does not change the data's representation but allows
data in registers to be used as different types, e.g. an i32x4 as a
b8x16. The only constraint on the result `a` is that it can be
`raw_bitcast` back to the original type. Also, in a raw_bitcast between
vector types with the same number of lanes, the value of each result
lane is a raw_bitcast of the corresponding operand lane. TODO there is
currently no mechanism for enforcing the bit width constraint.
"#,
&formats.unary,
)
Expand Down
9 changes: 2 additions & 7 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2212,8 +2212,8 @@
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; SIMD&FP <=> SIMD&FP
(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
(fpu_move out_ty x))
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type (ty_float_or_vec _)))))
x)

; GPR => SIMD&FP
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
Expand All @@ -2232,11 +2232,6 @@
x)
(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (raw_bitcast val))
val)

;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; extractlane with lane 0 can pass through the value unchanged; upper
Expand Down
2 changes: 0 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ pub(crate) fn lower_insn_to_regs(

Opcode::Vconst => implemented_in_isle(ctx),

Opcode::RawBitcast => implemented_in_isle(ctx),

Opcode::Extractlane => implemented_in_isle(ctx),

Opcode::Insertlane => implemented_in_isle(ctx),
Expand Down
5 changes: 0 additions & 5 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -814,11 +814,6 @@
(lower (has_type out (bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))

;;;;; Rules for `raw_bitcast`;;;;;;;;;
(rule
(lower (has_type out (raw_bitcast v @ (value_type in_ty))))
(gen_moves v in_ty out))

;;;;; Rules for `ceil`;;;;;;;;;
(rule
(lower (has_type ty (ceil x)))
Expand Down
21 changes: 15 additions & 6 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1760,16 +1760,25 @@
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
(vec_extract_lane $F32X4 x 0 (zero_reg)))


;;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; FIXME: There are two flavors of raw_bitcast, which are currently not
;; Bitcast between types residing in GPRs is a no-op.
(rule 1 (lower (has_type (gpr32_ty _)
(bitcast x @ (value_type (gpr32_ty _))))) x)
(rule 2 (lower (has_type (gpr64_ty _)
(bitcast x @ (value_type (gpr64_ty _))))) x)

;; Bitcast between types residing in FPRs is a no-op.
(rule 3 (lower (has_type (ty_scalar_float _)
(bitcast x @ (value_type (ty_scalar_float _))))) x)

;; Bitcast between types residing in VRs is a no-op.
;; FIXME: There are two flavors of vector bitcast, which are currently not
;; distinguished in CLIF IR. Those generated by Wasmtime assume little-endian
;; lane order, and those generated elsewhere assume big-endian lane order.
;; Raw bitcast is a no-op if current lane order matches that assumed lane order.
;; Bitcast is a no-op if current lane order matches that assumed lane order.
;; However, due to our choice of lane order depending on the current function
;; ABI, every bitcast we currently see here is indeed a no-op.
(rule (lower (raw_bitcast x)) x)
(rule 4 (lower (has_type (vr128_ty _)
(bitcast x @ (value_type (vr128_ty _))))) x)


;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ impl LowerBackend for S390xBackend {
| Opcode::ScalarToVector
| Opcode::VhighBits
| Opcode::Bitcast
| Opcode::RawBitcast
| Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
Expand Down
17 changes: 8 additions & 9 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,14 @@
(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
(bitcast_gpr_to_xmm $I64 src))

;; Bitcast between types residing in GPR registers is a no-op.
(rule 1 (lower (has_type (is_gpr_type _)
(bitcast x @ (value_type (is_gpr_type _))))) x)

;; Bitcast between types residing in XMM registers is a no-op.
(rule 2 (lower (has_type (is_xmm_type _)
(bitcast x @ (value_type (is_xmm_type _))))) x)

;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
Expand Down Expand Up @@ -3472,15 +3480,6 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))

;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see
;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
;; instruction should emit no machine code but a move is necessary to give the
;; register allocator a definition for the output virtual register.
(rule (lower (raw_bitcast val))
(put_in_regs val))

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ fn lower_insn_to_regs(
| Opcode::GetPinnedReg
| Opcode::SetPinnedReg
| Opcode::Vconst
| Opcode::RawBitcast
| Opcode::Insertlane
| Opcode::Shuffle
| Opcode::Swizzle
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/nan_canonicalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
.select(is_nan, canon_nan, new_res);
};
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
let result = pos.ins().raw_bitcast(types::I8X16, new_res);
let cond = pos.ins().bitcast(types::I8X16, is_nan);
let canon_nan = pos.ins().bitcast(types::I8X16, canon_nan);
let result = pos.ins().bitcast(types::I8X16, new_res);
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
pos.ins().with_result(val).bitcast(val_type, bitmask);
};

match val_type {
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/simple_preopt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ mod simplify {
return;
}
let new_type = I8.by(old_cond_type.bytes()).unwrap();
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
(pos.ins().bitcast(new_type, args[0]), new_type)
}
_ => return,
};
Expand All @@ -874,10 +874,10 @@ mod simplify {

if arg_type != old_arg_type {
// Operands types must match, we need to add bitcasts.
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
let arg1 = pos.ins().bitcast(arg_type, args[1]);
let arg2 = pos.ins().bitcast(arg_type, args[2]);
let ret = pos.ins().vselect(cond_val, arg1, arg2);
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
pos.func.dfg.replace(inst).bitcast(old_arg_type, ret);
} else {
pos.func
.dfg
Expand Down
12 changes: 1 addition & 11 deletions cranelift/codegen/src/verifier/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1078,17 +1078,7 @@ impl<'a> Verifier<'a> {
let typ = self.func.dfg.ctrl_typevar(inst);
let value_type = self.func.dfg.value_type(arg);

if typ.lane_bits() != value_type.lane_bits() {
errors.fatal((
inst,
format!(
"The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
arg,
value_type.lane_bits(),
typ.lane_bits()
),
))
} else if typ.bits() != value_type.bits() {
if typ.bits() != value_type.bits() {
errors.fatal((
inst,
format!(
Expand Down
6 changes: 3 additions & 3 deletions cranelift/filetests/filetests/isa/x64/move-elision.clif
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ block0(v0: i32x4):
;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
;; and epilogue.
v1 = raw_bitcast.f32x4 v0
v2 = raw_bitcast.f64x2 v1
v3 = raw_bitcast.i8x16 v2
v1 = bitcast.f32x4 v0
v2 = bitcast.f64x2 v1
v3 = bitcast.i8x16 v2
return v3
}

Expand Down
2 changes: 1 addition & 1 deletion cranelift/filetests/filetests/isa/x64/simd-issue-3951.clif
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function %check_issue_3951(i64 vmctx) -> i8x16 fast {
v4 = global_value.i64 gv0
v5 = load.i8x16 notrap aligned v4+8
v6 = icmp ugt v3, v5
v7 = raw_bitcast.i8x16 v6
v7 = bitcast.i8x16 v6
jump block1(v7)
block1(v1: i8x16):
return v1
Expand Down
5 changes: 3 additions & 2 deletions cranelift/filetests/filetests/runtests/bitcast-ref64.clif
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
test run
target aarch64
; the interpreter, x86_64, and s390x do not support bitcasting to/from
; references
target x86_64
target s390x
; the interpreter does not support bitcasting to/from references

function %bitcast_ir64(i64) -> i8 {
block0(v0: i64):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
; x86_64 and s390x do not support bitcasting to the same type as the input.
target x86_64
target s390x

function %bitcast_i8(i8) -> i8 {
block0(v0: i8):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ block0:

function %is_null_r64(i64) -> i8 {
block0(v0: i64):
v1 = raw_bitcast.r64 v0
v1 = bitcast.r64 v0
v2 = is_null v1
return v2
}
Expand All @@ -24,7 +24,7 @@ block0(v0: i64):

function %is_invalid_r64(i64) -> i8 {
block0(v0: i64):
v1 = raw_bitcast.r64 v0
v1 = bitcast.r64 v0
v2 = is_invalid v1
return v2
}
Expand Down
21 changes: 21 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-bitcast-aarch64.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
test interpret
test run
target aarch64
;; 64-bit vector types only supported on aarch64

function %bitcast_if32x2(i32x2) -> f32x2 {
block0(v0: i32x2):
v1 = bitcast.f32x2 v0
return v1
}
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]

function %bitcast_fi32x2(f32x2) -> i32x2 {
block0(v0: f32x2):
v1 = bitcast.i32x2 v0
return v1
}
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]

19 changes: 2 additions & 17 deletions cranelift/filetests/filetests/runtests/simd-bitcast.clif
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
test interpret
test run
target aarch64
; x86_64 and s390x do not support vector bitcasts.

function %bitcast_if32x2(i32x2) -> f32x2 {
block0(v0: i32x2):
v1 = bitcast.f32x2 v0
return v1
}
; run: %bitcast_if32x2([0 4294967295]) == [0x0.0 -NaN:0x3fffff]
; run: %bitcast_if32x2([-1 127]) == [-NaN:0x3fffff 0x0.0000fep-126]

function %bitcast_fi32x2(f32x2) -> i32x2 {
block0(v0: f32x2):
v1 = bitcast.i32x2 v0
return v1
}
; run: %bitcast_fi32x2([0x0.0 -NaN:0x3fffff]) == [0 4294967295]
; run: %bitcast_fi32x2([-NaN:0x3fffff 0x0.0000fep-126]) == [-1 127]
target x86_64
target s390x

function %bitcast_if32x4(i32x4) -> f32x4 {
block0(v0: i32x4):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ target x86_64 skylake
function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp sge v0, v1
v3 = raw_bitcast.i32x4 v2
v3 = bitcast.i32x4 v2
v4 = bitselect v3, v0, v1
return v4
}
; run: %mask_from_icmp([5 6 7 8], [1 10 20 7]) == [5 10 20 8]

function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
block0(v0: i64x2, v1: i64x2, v2: i32x4):
v3 = raw_bitcast.i64x2 v2
v3 = bitcast.i64x2 v2
v4 = bitselect v3, v0, v1
return v4
}
Expand Down
6 changes: 3 additions & 3 deletions cranelift/filetests/filetests/runtests/simd-comparison.clif
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ block0:
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v2 = icmp sgt v0, v1
v3 = raw_bitcast.i8x16 v2
v3 = bitcast.i8x16 v2
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v7 = icmp eq v3, v4
v8 = vall_true v7
Expand Down Expand Up @@ -126,7 +126,7 @@ block0:
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
v2 = icmp ult v0, v1
v3 = vconst.i16x8 0x00
v4 = raw_bitcast.i16x8 v2
v4 = bitcast.i16x8 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
Expand Down Expand Up @@ -200,7 +200,7 @@ block0:
v2 = fcmp gt v0, v1
; now check that the result v2 is all zeroes
v3 = vconst.i32x4 0x00
v4 = raw_bitcast.i32x4 v2
v4 = bitcast.i32x4 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
Expand Down
Loading

0 comments on commit 961107e

Please sign in to comment.