From a26be628bca5a9b1da950bbe6952766182883fe5 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 18 Jun 2021 00:19:49 +0100 Subject: [PATCH 1/3] aarch64: Implement lowering brz,brnz for i128 values --- .../codegen/src/isa/aarch64/lower_inst.rs | 22 +++++--- .../filetests/isa/aarch64/condbr.clif | 46 +++++++++++++++++ .../filetests/filetests/runtests/i128-br.clif | 51 ++++++++++--------- 3 files changed, 87 insertions(+), 32 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index da4501b81128..db5b021e2f0b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3515,6 +3515,7 @@ pub(crate) fn lower_branch>( match op0 { Opcode::Brz | Opcode::Brnz => { + let ty = ctx.input_ty(branches[0], 0); let flag_input = InsnInput { insn: branches[0], input: 0, @@ -3549,14 +3550,19 @@ pub(crate) fn lower_branch>( kind: CondBrKind::Cond(cond), }); } else { - let rt = put_input_in_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - NarrowValueMode::ZeroExtend64, - ); + let rt = if ty == I128 { + let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); + let input = put_input_in_regs(ctx, flag_input); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd: tmp, + rn: input.regs()[0], + rm: input.regs()[1], + }); + tmp.to_reg() + } else { + put_input_in_reg(ctx, flag_input, NarrowValueMode::ZeroExtend64) + }; let kind = match op0 { Opcode::Brz => CondBrKind::Zero(rt), Opcode::Brnz => CondBrKind::NotZero(rt), diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index bfa4d04f7713..95d86cb4cbc4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -253,3 +253,49 @@ block1: ; check: movz x0, #1 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + + +function %i128_brz(i128){ +block0(v0: i128): + brz v0, block1 + jump block1 + +block1: + nop + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: orr x0, x0, x1 +; nextln: cbz x0, label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_brnz(i128){ +block0(v0: i128): + brnz v0, block1 + jump block1 + +block1: + nop + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: orr x0, x0, x1 +; nextln: cbnz x0, label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index e946bcb8d860..5902d41ad9b7 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -1,42 +1,45 @@ test run -; target aarch64 TODO: Not yet implemented on aarch64 +target aarch64 ; target s390x TODO: Not yet implemented on s390x target x86_64 machinst target x86_64 legacy -function %br_false() -> b1 { -block0: - v10 = iconst.i64 0x42 - v11 = iconst.i64 0x00 - v0 = iconcat v10, v11 - brz v0, block2 +function %i128_br(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = iconcat v0, v1 + brz v2, block2 jump block1 block1: - v1 = bconst.b1 true - return v1 + v3 = bconst.b1 true + return v3 block2: - v2 = bconst.b1 false - return v2 + v4 = bconst.b1 false + return v4 } -; run - -function %br_true() -> b1 { -block0: - v10 = iconst.i64 0x00 - v11 = iconst.i64 0x00 - v0 = iconcat v10, v11 - brz v0, block2 +; run: %i128_br(0, 0) == false +; run: %i128_br(-1, 0) == true +; run: %i128_br(0, -1) == true +; run: %i128_br(-1, -1) == true + + +function %i128_brnz(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = iconcat v0, v1 + brnz v2, block2 jump block1 block1: - v1 = bconst.b1 false - return v1 + v3 = bconst.b1 true + return v3 block2: - v2 = bconst.b1 true - return v2 + v4 = bconst.b1 false + return v4 } -; run +; run: %i128_brnz(0, 0) == true +; run: %i128_brnz(-1, 0) == false +; run: %i128_brnz(0, -1) == false +; run: %i128_brnz(-1, -1) == false From b5708b4386745ee23e428eb111409a3cdc0be1de Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 17 Jun 2021 22:45:38 +0100 Subject: [PATCH 2/3] aarch64: Deduplicate lowering icmp Lowering icmp was duplicated across callers that only cared about flags, and callers that only cared about the bool result. Merge both callers into `lower_icmp` which does the correct thing depending on a new IcmpOutput parameter. --- cranelift/codegen/src/isa/aarch64/lower.rs | 167 ++++++++++++++++-- .../codegen/src/isa/aarch64/lower_inst.rs | 142 +-------------- 2 files changed, 163 insertions(+), 146 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 3130b03b4cc6..145171c72848 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1154,12 +1154,43 @@ pub(crate) fn maybe_input_insn_via_conv>( None } -pub(crate) fn lower_icmp_or_ifcmp_to_flags>( +/// Specifies what [lower_icmp] should do when lowering +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum IcmpOutput { + /// Only sets flags, discarding the results + Flags, + /// Materializes the results into a register. The flags set may be incorrect + Register(Writable), +} + +impl IcmpOutput { + pub fn reg(&self) -> Option> { + match self { + IcmpOutput::Flags => None, + IcmpOutput::Register(reg) => Some(*reg), + } + } +} + +/// Lower an icmp comparision +/// +/// We can lower into the status flags, or materialize the result into a register +/// This is controlled by the `output` parameter. +pub(crate) fn lower_icmp>( ctx: &mut C, insn: IRInst, - is_signed: bool, -) { - debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn); + condcode: IntCC, + output: IcmpOutput, +) -> CodegenResult<()> { + debug!( + "lower_icmp: insn {}, condcode: {}, output: {:?}", + insn, condcode, output + ); + + let rd = output.reg().unwrap_or(writable_zero_reg()); + let inputs = insn_inputs(ctx, insn); + let cond = lower_condcode(condcode); + let is_signed = condcode_is_signed(condcode); let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let narrow_mode = match (bits <= 32, is_signed) { @@ -1168,14 +1199,126 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags>( (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; - let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let ty = ctx.input_ty(insn, 0); - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); - debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm); - let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); - let rd = writable_zero_reg(); - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + + if ty == I128 { + let lhs = put_input_in_regs(ctx, inputs[0]); + let rhs = put_input_in_regs(ctx, inputs[1]); + + let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap(); + + match condcode { + IntCC::Equal | IntCC::NotEqual => { + // eor tmp1, lhs_lo, rhs_lo + // eor tmp2, lhs_hi, rhs_hi + // adds xzr, tmp1, tmp2 + // cset dst, {eq, ne} + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: tmp1, + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: tmp2, + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_zero_reg(), + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } + IntCC::Overflow | IntCC::NotOverflow => { + // We can do an 128bit add while throwing away the results + // and check the overflow flags at the end. + // + // adds xzr, lhs_lo, rhs_lo + // adcs xzr, lhs_hi, rhs_hi + // cset dst, {vs, vc} + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_zero_reg(), + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AdcS64, + rd: writable_zero_reg(), + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } + _ => { + // The currently generated ASM does not correctly set the flags, so we assert here + // to ensure that we don't silently lower incorrect code. + assert_ne!(IcmpOutput::Flags, output, "Unable to lower icmp to flags"); + + // cmp lhs_lo, rhs_lo + // cset tmp1, low_cc + // cmp lhs_hi, rhs_hi + // cset tmp2, cond + // csel dst, tmp1, tmp2, eq + + let low_cc = match condcode { + IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => Cond::Hs, + IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi, + IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => Cond::Ls, + IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo, + _ => unreachable!(), + }; + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn: lhs.regs()[0], + rm: rhs.regs()[0], + }); + materialize_bool_result(ctx, insn, tmp1, low_cc); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + materialize_bool_result(ctx, insn, tmp2, cond); + ctx.emit(Inst::CSel { + cond: Cond::Eq, + rd, + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + } + } + } else if !ty.is_vector() { + let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); + ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); + + if let IcmpOutput::Register(rd) = output { + materialize_bool_result(ctx, insn, rd, cond); + } + } else { + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; + } + + Ok(()) } pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, insn: IRInst) { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index db5b021e2f0b..c4f732defe3c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1,7 +1,7 @@ //! Lower a single Cranelift instruction into vcode. use crate::binemit::CodeOffset; -use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::condcodes::FloatCC; use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; @@ -1528,8 +1528,7 @@ pub(crate) fn lower_insn_to_regs>( { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); - lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed); + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; cond } else if let Some(fcmp_insn) = maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint) @@ -1577,11 +1576,10 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Selectif | Opcode::SelectifSpectreGuard => { let condcode = ctx.data(insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; // csel.COND rd, rn, rm let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); @@ -1648,14 +1646,11 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trueif => { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a // single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - materialize_bool_result(ctx, insn, rd, cond); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Register(rd))?; } Opcode::Trueff => { @@ -1847,126 +1842,8 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Icmp => { let condcode = ctx.data(insn).cond_code().unwrap(); - let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ctx.input_ty(insn, 0); - let bits = ty_bits(ty); - let narrow_mode = match (bits <= 32, is_signed) { - (true, true) => NarrowValueMode::SignExtend32, - (true, false) => NarrowValueMode::ZeroExtend32, - (false, true) => NarrowValueMode::SignExtend64, - (false, false) => NarrowValueMode::ZeroExtend64, - }; - - if ty == I128 { - let lhs = put_input_in_regs(ctx, inputs[0]); - let rhs = put_input_in_regs(ctx, inputs[1]); - - let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap(); - - match condcode { - IntCC::Equal | IntCC::NotEqual => { - // eor tmp1, lhs_lo, rhs_lo - // eor tmp2, lhs_hi, rhs_hi - // adds xzr, tmp1, tmp2 - // cset dst, {eq, ne} - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor64, - rd: tmp1, - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor64, - rd: tmp2, - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS64, - rd: writable_zero_reg(), - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - materialize_bool_result(ctx, insn, rd, cond); - } - IntCC::Overflow | IntCC::NotOverflow => { - // We can do an 128bit add while throwing away the results - // and check the overflow flags at the end. - // - // adds xzr, lhs_lo, rhs_lo - // adcs xzr, lhs_hi, rhs_hi - // cset dst, {vs, vc} - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AdcS64, - rd: writable_zero_reg(), - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - materialize_bool_result(ctx, insn, rd, cond); - } - _ => { - // cmp lhs_lo, rhs_lo - // cset tmp1, low_cc - // cmp lhs_hi, rhs_hi - // cset tmp2, cond - // csel dst, tmp1, tmp2, eq - - let low_cc = match condcode { - IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => { - Cond::Hs - } - IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi, - IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => { - Cond::Ls - } - IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo, - _ => unreachable!(), - }; - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - materialize_bool_result(ctx, insn, tmp1, low_cc); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS64, - rd: writable_zero_reg(), - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - materialize_bool_result(ctx, insn, tmp2, cond); - ctx.emit(Inst::CSel { - cond: Cond::Eq, - rd, - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - } - } - } else if !ty.is_vector() { - let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); - ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); - materialize_bool_result(ctx, insn, rd, cond); - } else { - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); - lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; - } + lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?; } Opcode::Fcmp => { @@ -2020,11 +1897,10 @@ pub(crate) fn lower_insn_to_regs>( } else if op == Opcode::Trapif { let condcode = ctx.data(insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a single-def ifcmp. let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; cond } else { let condcode = ctx.data(insn).fp_cond_code().unwrap(); @@ -3525,11 +3401,10 @@ pub(crate) fn lower_branch>( { let condcode = ctx.data(icmp_insn).cond_code().unwrap(); let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); let negated = op0 == Opcode::Brz; let cond = if negated { cond.invert() } else { cond }; - lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed); + lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, @@ -3621,13 +3496,12 @@ pub(crate) fn lower_branch>( let cond = lower_condcode(condcode); let kind = CondBrKind::Cond(cond); - let is_signed = condcode_is_signed(condcode); let flag_input = InsnInput { insn: branches[0], input: 0, }; if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) { - lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); + lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, From 45faace329135d18cf08ddea819444f41a071260 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 19 Jun 2021 22:01:33 +0100 Subject: [PATCH 3/3] aarch64: Implement i128 br_icmp The previous commit deduplicated the icmp impl, so we reuse that but make modifications where we don't need to set the results. --- cranelift/codegen/src/isa/aarch64/lower.rs | 49 ++- .../codegen/src/isa/aarch64/lower_inst.rs | 30 +- .../filetests/isa/aarch64/condbr.clif | 308 +++++++++++++++++ .../filetests/filetests/runtests/i128-br.clif | 26 +- .../filetests/runtests/i128-bricmp.clif | 321 ++++++++++++++++++ 5 files changed, 679 insertions(+), 55 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/i128-bricmp.clif diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 145171c72848..a8a445030178 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1263,23 +1263,14 @@ pub(crate) fn lower_icmp>( } } _ => { - // The currently generated ASM does not correctly set the flags, so we assert here - // to ensure that we don't silently lower incorrect code. - assert_ne!(IcmpOutput::Flags, output, "Unable to lower icmp to flags"); - // cmp lhs_lo, rhs_lo - // cset tmp1, low_cc + // cset tmp1, unsigned_cond // cmp lhs_hi, rhs_hi // cset tmp2, cond // csel dst, tmp1, tmp2, eq - let low_cc = match condcode { - IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => Cond::Hs, - IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi, - IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => Cond::Ls, - IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo, - _ => unreachable!(), - }; + let rd = output.reg().unwrap_or(tmp1); + let unsigned_cond = lower_condcode(condcode.unsigned()); ctx.emit(Inst::AluRRR { alu_op: ALUOp::SubS64, @@ -1287,7 +1278,7 @@ pub(crate) fn lower_icmp>( rn: lhs.regs()[0], rm: rhs.regs()[0], }); - materialize_bool_result(ctx, insn, tmp1, low_cc); + materialize_bool_result(ctx, insn, tmp1, unsigned_cond); ctx.emit(Inst::AluRRR { alu_op: ALUOp::SubS64, rd: writable_zero_reg(), @@ -1301,6 +1292,38 @@ pub(crate) fn lower_icmp>( rn: tmp1.to_reg(), rm: tmp2.to_reg(), }); + + if output == IcmpOutput::Flags { + // We only need to guarantee that the flags for `cond` are correct, so we can + // compare rd with 0 or 1 + + // If we are doing compare or equal, we want to compare with 1 instead of zero + if condcode.without_equal() != condcode { + lower_constant_u64(ctx, tmp2, 1); + } + + let xzr = zero_reg(); + let rd = rd.to_reg(); + let tmp2 = tmp2.to_reg(); + let (rn, rm) = match condcode { + IntCC::SignedGreaterThanOrEqual => (rd, tmp2), + IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2), + IntCC::SignedLessThanOrEqual => (tmp2, rd), + IntCC::UnsignedLessThanOrEqual => (tmp2, rd), + IntCC::SignedGreaterThan => (rd, xzr), + IntCC::UnsignedGreaterThan => (rd, xzr), + IntCC::SignedLessThan => (xzr, rd), + IntCC::UnsignedLessThan => (xzr, rd), + _ => unreachable!(), + }; + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn, + rm, + }); + } } } } else if !ty.is_vector() { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c4f732defe3c..8b130af9b112 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3455,35 +3455,7 @@ pub(crate) fn lower_branch>( let cond = lower_condcode(condcode); let kind = CondBrKind::Cond(cond); - let is_signed = condcode_is_signed(condcode); - let ty = ctx.input_ty(branches[0], 0); - let bits = ty_bits(ty); - let narrow_mode = match (bits <= 32, is_signed) { - (true, true) => NarrowValueMode::SignExtend32, - (true, false) => NarrowValueMode::ZeroExtend32, - (false, true) => NarrowValueMode::SignExtend64, - (false, false) => NarrowValueMode::ZeroExtend64, - }; - let rn = put_input_in_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - narrow_mode, - ); - let rm = put_input_in_rse_imm12( - ctx, - InsnInput { - insn: branches[0], - input: 1, - }, - narrow_mode, - ); - - let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); - let rd = writable_zero_reg(); - ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + lower_icmp(ctx, branches[0], condcode, IcmpOutput::Flags)?; ctx.emit(Inst::CondBr { taken, not_taken, diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 95d86cb4cbc4..19fd4fb33e38 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -299,3 +299,311 @@ block1: ; check: Block 3: ; check: ldp fp, lr, [sp], #16 ; nextln: ret + + + +function %i128_bricmp_eq(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp eq v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: eor x0, x0, x2 +; nextln: eor x1, x1, x3 +; nextln: adds xzr, x0, x1 +; nextln: b.eq label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_bricmp_ne(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ne v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: eor x0, x0, x2 +; nextln: eor x1, x1, x3 +; nextln: adds xzr, x0, x1 +; nextln: b.ne label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_bricmp_slt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp slt v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, lo +; nextln: subs xzr, x1, x3 +; nextln: cset x1, lt +; nextln: csel x0, x0, x1, eq +; nextln: subs xzr, xzr, x0 +; nextln: b.lt label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_bricmp_ult(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ult v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, lo +; nextln: subs xzr, x1, x3 +; nextln: cset x1, lo +; nextln: csel x0, x0, x1, eq +; nextln: subs xzr, xzr, x0 +; nextln: b.lo label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_sle(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sle v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, ls +; nextln: subs xzr, x1, x3 +; nextln: cset x1, le +; nextln: csel x0, x0, x1, eq +; nextln: movz x1, #1 +; nextln: subs xzr, x1, x0 +; nextln: b.le label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_ule(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ule v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, ls +; nextln: subs xzr, x1, x3 +; nextln: cset x1, ls +; nextln: csel x0, x0, x1, eq +; nextln: movz x1, #1 +; nextln: subs xzr, x1, x0 +; nextln: b.ls label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_sgt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sgt v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, hi +; nextln: subs xzr, x1, x3 +; nextln: cset x1, gt +; nextln: csel x0, x0, x1, eq +; nextln: subs xzr, x0, xzr +; nextln: b.gt label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_ugt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ugt v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, hi +; nextln: subs xzr, x1, x3 +; nextln: cset x1, hi +; nextln: csel x0, x0, x1, eq +; nextln: subs xzr, x0, xzr +; nextln: b.hi label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_bricmp_sge(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sge v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, hs +; nextln: subs xzr, x1, x3 +; nextln: cset x1, ge +; nextln: csel x0, x0, x1, eq +; nextln: movz x1, #1 +; nextln: subs xzr, x0, x1 +; nextln: b.ge label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_uge(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp uge v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, x2 +; nextln: cset x0, hs +; nextln: subs xzr, x1, x3 +; nextln: cset x1, hs +; nextln: csel x0, x0, x1, eq +; nextln: movz x1, #1 +; nextln: subs xzr, x0, x1 +; nextln: b.hs label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_of(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp of v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: adds xzr, x0, x2 +; nextln: adcs xzr, x1, x3 +; nextln: b.vs label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_bricmp_nof(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp nof v0, v1, block1 + jump block1 + +block1: + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: adds xzr, x0, x2 +; nextln: adcs xzr, x1, x3 +; nextln: b.vc label1 ; b label2 +; check: Block 1: +; check: b label3 +; check: Block 2: +; check: b label3 +; check: Block 3: +; check: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index 5902d41ad9b7..74b4f6fa8fa0 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -5,24 +5,24 @@ target x86_64 machinst target x86_64 legacy -function %i128_br(i64, i64) -> b1 { +function %i128_brz(i64, i64) -> b1 { block0(v0: i64, v1: i64): v2 = iconcat v0, v1 brz v2, block2 jump block1 block1: - v3 = bconst.b1 true + v3 = bconst.b1 false return v3 block2: - v4 = bconst.b1 false + v4 = bconst.b1 true return v4 } -; run: %i128_br(0, 0) == false -; run: %i128_br(-1, 0) == true -; run: %i128_br(0, -1) == true -; run: %i128_br(-1, -1) == true +; run: %i128_brz(0, 0) == true +; run: %i128_brz(-1, 0) == false +; run: %i128_brz(0, -1) == false +; run: %i128_brz(-1, -1) == false function %i128_brnz(i64, i64) -> b1 { @@ -32,14 +32,14 @@ block0(v0: i64, v1: i64): jump block1 block1: - v3 = bconst.b1 true + v3 = bconst.b1 false return v3 block2: - v4 = bconst.b1 false + v4 = bconst.b1 true return v4 } -; run: %i128_brnz(0, 0) == true -; run: %i128_brnz(-1, 0) == false -; run: %i128_brnz(0, -1) == false -; run: %i128_brnz(-1, -1) == false +; run: %i128_brnz(0, 0) == false +; run: %i128_brnz(-1, 0) == true +; run: %i128_brnz(0, -1) == true +; run: %i128_brnz(-1, -1) == true diff --git a/cranelift/filetests/filetests/runtests/i128-bricmp.clif b/cranelift/filetests/filetests/runtests/i128-bricmp.clif new file mode 100644 index 000000000000..0f5f96536bf9 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bricmp.clif @@ -0,0 +1,321 @@ +test run +target aarch64 + +function %i128_bricmp_eq(i64, i64, i64, i64) -> b1 { +block0(v0: i64, v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 eq v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_eq(0, 0, 0, 0) == true +; run: %i128_bricmp_eq(-1, -1, -1, -1) == true +; run: %i128_bricmp_eq(-1, -1, 0, 0) == false +; run: %i128_bricmp_eq(-1, -1, 0, -1) == false +; run: %i128_bricmp_eq(-1, 0, -1, -1) == false +; run: %i128_bricmp_eq(0, -1, -1, -1) == false +; run: %i128_bricmp_eq(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == false +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == false + +function %i128_bricmp_ne(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 ne v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_ne(0, 0, 0, 0) == false +; run: %i128_bricmp_ne(-1, -1, -1, -1) == false +; run: %i128_bricmp_ne(-1, -1, 0, 0) == true +; run: %i128_bricmp_ne(-1, -1, 0, -1) == true +; run: %i128_bricmp_ne(-1, 0, -1, -1) == true +; run: %i128_bricmp_ne(0, -1, -1, -1) == true +; run: %i128_bricmp_ne(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == false +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == true +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == true + + +function %i128_bricmp_slt(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 slt v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_slt(0, 0, 0, 0) == false +; run: %i128_bricmp_slt(1, 0, 1, 0) == false +; run: %i128_bricmp_slt(0, 0, 1, 0) == true +; run: %i128_bricmp_slt(-1, -1, 0, 0) == true +; run: %i128_bricmp_slt(0, 0, -1, -1) == false +; run: %i128_bricmp_slt(-1, -1, -1, -1) == false +; run: %i128_bricmp_slt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_slt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true +; run: %i128_bricmp_slt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false + +function %i128_bricmp_ult(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 ult v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_ult(0, 0, 0, 0) == false +; run: %i128_bricmp_ult(1, 0, 1, 0) == false +; run: %i128_bricmp_ult(0, 0, 1, 0) == true +; run: %i128_bricmp_ult(-1, -1, 0, 0) == false +; run: %i128_bricmp_ult(0, 0, -1, -1) == true +; run: %i128_bricmp_ult(-1, -1, -1, -1) == false +; run: %i128_bricmp_ult(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ult(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true +; run: %i128_bricmp_ult(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false + +function %i128_bricmp_sle(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 sle v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_sle(0, 0, 0, 0) == true +; run: %i128_bricmp_sle(1, 0, 1, 0) == true +; run: %i128_bricmp_sle(0, 0, 1, 0) == true +; run: %i128_bricmp_sle(-1, -1, 0, 0) == true +; run: %i128_bricmp_sle(0, 0, -1, -1) == false +; run: %i128_bricmp_sle(-1, -1, -1, -1) == true +; run: %i128_bricmp_sle(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_sle(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true +; run: %i128_bricmp_sle(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false + +function %i128_bricmp_ule(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 ule v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_ule(0, 0, 0, 0) == true +; run: %i128_bricmp_ule(1, 0, 1, 0) == true +; run: %i128_bricmp_ule(0, 0, 1, 0) == true +; run: %i128_bricmp_ule(-1, -1, 0, 0) == false +; run: %i128_bricmp_ule(0, 0, -1, -1) == true +; run: %i128_bricmp_ule(-1, -1, -1, -1) == true +; run: %i128_bricmp_ule(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ule(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true +; run: %i128_bricmp_ule(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false + +function %i128_bricmp_sgt(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 sgt v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_sgt(0, 0, 0, 0) == false +; run: %i128_bricmp_sgt(1, 0, 1, 0) == false +; run: %i128_bricmp_sgt(0, 0, 1, 0) == false +; run: %i128_bricmp_sgt(-1, -1, 0, 0) == false +; run: %i128_bricmp_sgt(0, 0, -1, -1) == true +; run: %i128_bricmp_sgt(-1, -1, -1, -1) == false +; run: %i128_bricmp_sgt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sgt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false +; run: %i128_bricmp_sgt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true + +function %i128_bricmp_ugt(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 ugt v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_ugt(0, 0, 0, 0) == false +; run: %i128_bricmp_ugt(1, 0, 1, 0) == false +; run: %i128_bricmp_ugt(0, 0, 1, 0) == false +; run: %i128_bricmp_ugt(-1, -1, 0, 0) == true +; run: %i128_bricmp_ugt(0, 0, -1, -1) == false +; run: %i128_bricmp_ugt(-1, -1, -1, -1) == false +; run: %i128_bricmp_ugt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_ugt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false +; run: %i128_bricmp_ugt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true + +function %i128_bricmp_sge(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 sge v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_sge(0, 0, 0, 0) == true +; run: %i128_bricmp_sge(1, 0, 1, 0) == true +; run: %i128_bricmp_sge(0, 0, 1, 0) == false +; run: %i128_bricmp_sge(-1, -1, 0, 0) == false +; run: %i128_bricmp_sge(0, 0, -1, -1) == true +; run: %i128_bricmp_sge(-1, -1, -1, -1) == true +; run: %i128_bricmp_sge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false +; run: %i128_bricmp_sge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true + +function %i128_bricmp_uge(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 uge v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_uge(0, 0, 0, 0) == true +; run: %i128_bricmp_uge(1, 0, 1, 0) == true +; run: %i128_bricmp_uge(0, 0, 1, 0) == false +; run: %i128_bricmp_uge(-1, -1, 0, 0) == true +; run: %i128_bricmp_uge(0, 0, -1, -1) == false +; run: %i128_bricmp_uge(-1, -1, -1, -1) == true +; run: %i128_bricmp_uge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_uge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false +; run: %i128_bricmp_uge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true + +function %i128_bricmp_of(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 of v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_of(0, 0, 0, 0) == false +; run: %i128_bricmp_of(0, 0, 1, 0) == false +; run: %i128_bricmp_of(0, 0, -1, -1) == false +; run: %i128_bricmp_of(-1, -1, -1, -1) == false +; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0, 0) == false +; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == false +; run: %i128_bricmp_of(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == true +; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == false +; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == false +; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == true + +function %i128_bricmp_nof(i64, i64, i64, i64) -> b1 { +block0(v0: i64,v1: i64,v2: i64,v3: i64): + v4 = iconcat v0, v1 + v5 = iconcat v2, v3 + + br_icmp.i128 nof v4, v5, block2 + jump block1 + +block1: + v6 = bconst.b1 false + return v6 + +block2: + v7 = bconst.b1 true + return v7 +} +; run: %i128_bricmp_nof(0, 0, 0, 0) == true +; run: %i128_bricmp_nof(0, 0, 1, 0) == true +; run: %i128_bricmp_nof(0, 0, -1, -1) == true +; run: %i128_bricmp_nof(-1, -1, -1, -1) == true +; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0, 0) == true +; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == true +; run: %i128_bricmp_nof(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == false +; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == true +; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == true +; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == false