Skip to content

Commit

Permalink
8346235: RISC-V: Optimize bitwise AND with mask values
Browse files Browse the repository at this point in the history
Reviewed-by: gcao, rehn, fjiang
  • Loading branch information
RealFYang committed Dec 17, 2024
1 parent 929d4a5 commit a7631cc
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 120 deletions.
8 changes: 4 additions & 4 deletions src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
if (Assembler::is_simm12(c - 1)) {
__ andi(t1, t1, c - 1);
} else {
__ zero_extend(t1, t1, shift);
__ zext(t1, t1, shift);
}
__ subw(dreg, t1, t0);
}
Expand All @@ -78,7 +78,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
if (Assembler::is_simm12(c - 1)) {
__ andi(t0, t0, c - 1);
} else {
__ zero_extend(t0, t0, shift);
__ zext(t0, t0, shift);
}
__ addw(dreg, t0, lreg);
__ sraiw(dreg, dreg, shift);
Expand Down Expand Up @@ -205,7 +205,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
if (Assembler::is_simm12(c - 1)) {
__ andi(t0, t0, c - 1);
} else {
__ zero_extend(t0, t0, shift);
__ zext(t0, t0, shift);
}
__ add(dreg, t0, lreg_lo);
__ srai(dreg, dreg, shift);
Expand All @@ -224,7 +224,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
if (Assembler::is_simm12(c - 1)) {
__ andi(t1, t1, c - 1);
} else {
__ zero_extend(t1, t1, shift);
__ zext(t1, t1, shift);
}
__ sub(dreg, t1, t0);
}
Expand Down
14 changes: 7 additions & 7 deletions src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -952,15 +952,15 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
case Bytecodes::_d2f:
__ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break;
case Bytecodes::_i2c:
__ zero_extend(dest->as_register(), src->as_register(), 16); break;
__ zext(dest->as_register(), src->as_register(), 16); break;
case Bytecodes::_i2l:
__ sign_extend(dest->as_register_lo(), src->as_register(), 32); break;
__ sext(dest->as_register_lo(), src->as_register(), 32); break;
case Bytecodes::_i2s:
__ sign_extend(dest->as_register(), src->as_register(), 16); break;
__ sext(dest->as_register(), src->as_register(), 16); break;
case Bytecodes::_i2b:
__ sign_extend(dest->as_register(), src->as_register(), 8); break;
__ sext(dest->as_register(), src->as_register(), 8); break;
case Bytecodes::_l2i:
__ sign_extend(dest->as_register(), src->as_register_lo(), 32); break;
__ sext(dest->as_register(), src->as_register_lo(), 32); break;
case Bytecodes::_d2l:
__ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break;
case Bytecodes::_f2i:
Expand Down Expand Up @@ -1288,7 +1288,7 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
int right_const = right->as_jint();
if (Assembler::is_simm12(right_const)) {
logic_op_imm(Rdst, Rleft, right_const, code);
__ sign_extend(Rdst, Rdst, 32);
__ sext(Rdst, Rdst, 32);
} else {
__ mv(t0, right_const);
logic_op_reg32(Rdst, Rleft, t0, code);
Expand Down Expand Up @@ -1609,7 +1609,7 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
__ la(res, ExternalAddress(StubRoutines::crc_table_addr()));

__ notr(crc, crc); // ~crc
__ zero_extend(crc, crc, 32);
__ zext(crc, crc, 32);
__ update_byte_crc32(crc, val, res);
__ notr(res, crc); // ~crc
}
Expand Down
10 changes: 5 additions & 5 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
slli(ch1, tmp6, XLEN - 16);
srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
zext(tmp6, tmp6, 8); // pattern[m-4], 0x0000000d
slli(ch2, ch2, 16);
orr(ch2, ch2, ch1); // 0x00000b0c
slli(result, tmp3, 48); // use result as temp register
Expand Down Expand Up @@ -1540,11 +1540,11 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
srl(tmp1, tmp1, result);
srl(tmp2, tmp2, result);
if (isLL) {
andi(tmp1, tmp1, 0xFF);
andi(tmp2, tmp2, 0xFF);
zext(tmp1, tmp1, 8);
zext(tmp2, tmp2, 8);
} else {
andi(tmp1, tmp1, 0xFFFF);
andi(tmp2, tmp2, 0xFFFF);
zext(tmp1, tmp1, 16);
zext(tmp2, tmp2, 16);
}
sub(result, tmp1, tmp2);
j(DONE);
Expand Down
10 changes: 5 additions & 5 deletions src/hotspot/cpu/riscv/interp_masm_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,20 @@ void InterpreterMacroAssembler::narrow(Register result) {
bind(notBool);
mv(t1, T_BYTE);
bne(t0, t1, notByte);
sign_extend(result, result, 8);
sext(result, result, 8);
j(done);

bind(notByte);
mv(t1, T_CHAR);
bne(t0, t1, notChar);
zero_extend(result, result, 16);
zext(result, result, 16);
j(done);

bind(notChar);
sign_extend(result, result, 16);
sext(result, result, 16);

bind(done);
sign_extend(result, result, 32);
sext(result, result, 32);
}

void InterpreterMacroAssembler::jump_to_entry(address entry) {
Expand Down Expand Up @@ -276,7 +276,7 @@ void InterpreterMacroAssembler::push_ptr(Register r) {

void InterpreterMacroAssembler::push_i(Register r) {
addi(esp, esp, -wordSize);
sign_extend(r, r, 32);
sext(r, r, 32);
sd(r, Address(esp, 0));
}

Expand Down
72 changes: 36 additions & 36 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1536,9 +1536,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return count;
}

static const int64_t right_32_bits = right_n_bits(32);
static const int64_t right_8_bits = right_n_bits(8);

/**
* Emits code to update CRC-32 with a byte value according to constants in table
*
Expand All @@ -1555,7 +1552,7 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
assert_different_registers(crc, val, table);

xorr(val, val, crc);
andi(val, val, right_8_bits);
zext(val, val, 8);
shadd(val, val, table, val, 2);
lwu(val, Address(val));
srli(crc, crc, 8);
Expand Down Expand Up @@ -1585,7 +1582,7 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1,
srli(v, v, 32);
xorr(v, v, crc);

andi(tmp1, v, right_8_bits);
zext(tmp1, v, 8);
shadd(tmp1, tmp1, table3, tmp2, 2);
lwu(crc, Address(tmp1));

Expand Down Expand Up @@ -2086,7 +2083,11 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
const int64_t single_table_size = 256;
const int64_t unroll = 16;
const int64_t unroll_words = unroll*wordSize;
mv(tmp5, right_32_bits);

// tmp5 = 0xffffffff
notr(tmp5, zr);
srli(tmp5, tmp5, 32);

andn(crc, tmp5, crc);

const ExternalAddress table_addr = StubRoutines::crc_table_addr();
Expand All @@ -2110,7 +2111,7 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
subw(len, len, 2);
lhu(tmp1, Address(buf));
add(buf, buf, 2);
andi(tmp2, tmp1, right_8_bits);
zext(tmp2, tmp1, 8);
update_byte_crc32(crc, tmp2, table0);
srli(tmp2, tmp1, 8);
update_byte_crc32(crc, tmp2, table0);
Expand Down Expand Up @@ -2634,17 +2635,17 @@ void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register

void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
andr(Rd, Rs1, Rs2);
sign_extend(Rd, Rd, 32);
sext(Rd, Rd, 32);
}

void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
orr(Rd, Rs1, Rs2);
sign_extend(Rd, Rd, 32);
sext(Rd, Rd, 32);
}

void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
xorr(Rd, Rs1, Rs2);
sign_extend(Rd, Rd, 32);
sext(Rd, Rd, 32);
}

// Rd = Rs1 & (~Rd2)
Expand Down Expand Up @@ -2832,18 +2833,18 @@ void MacroAssembler::revbw(Register Rd, Register Rs, Register tmp1, Register tmp
}
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1, tmp2);
andi(tmp1, Rs, 0xFF);
zext(tmp1, Rs, 8);
slli(tmp1, tmp1, 8);
for (int step = 8; step < 24; step += 8) {
srli(tmp2, Rs, step);
andi(tmp2, tmp2, 0xFF);
zext(tmp2, tmp2, 8);
orr(tmp1, tmp1, tmp2);
slli(tmp1, tmp1, 8);
}
srli(Rd, Rs, 24);
andi(Rd, Rd, 0xFF);
zext(Rd, Rd, 8);
orr(Rd, tmp1, Rd);
sign_extend(Rd, Rd, 32);
sext(Rd, Rd, 32);
}

// reverse bytes in doubleword
Expand All @@ -2855,16 +2856,16 @@ void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2
}
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1, tmp2);
andi(tmp1, Rs, 0xFF);
zext(tmp1, Rs, 8);
slli(tmp1, tmp1, 8);
for (int step = 8; step < 56; step += 8) {
srli(tmp2, Rs, step);
andi(tmp2, tmp2, 0xFF);
zext(tmp2, tmp2, 8);
orr(tmp1, tmp1, tmp2);
slli(tmp1, tmp1, 8);
}
srli(Rd, Rs, 56);
andi(Rd, Rd, 0xFF);
zext(Rd, Rd, 8);
orr(Rd, tmp1, Rd);
}

Expand Down Expand Up @@ -3237,7 +3238,7 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register

if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
CompressedKlassPointers::shift() == 0) {
zero_extend(dst, src, 32);
zext(dst, src, 32);
return;
}

Expand Down Expand Up @@ -3690,7 +3691,7 @@ void MacroAssembler::load_reserved(Register dst,
break;
case uint32:
lr_w(dst, addr, acquire);
zero_extend(dst, dst, 32);
zext(dst, dst, 32);
break;
default:
ShouldNotReachHere();
Expand Down Expand Up @@ -3731,7 +3732,7 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
} else {
// size == int16 case
mv(mask, -1);
zero_extend(mask, mask, 16);
zext(mask, mask, 16);
}
sll(mask, mask, shift);

Expand Down Expand Up @@ -3808,10 +3809,10 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
srl(result, scratch0, shift);

if (size == int8) {
sign_extend(result, result, 8);
sext(result, result, 8);
} else {
// size == int16 case
sign_extend(result, result, 16);
sext(result, result, 16);
}
}
}
Expand Down Expand Up @@ -4005,7 +4006,7 @@ ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
#define ATOMIC_XCHGU(OP1, OP2) \
void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
atomic_##OP2(prev, newv, addr); \
zero_extend(prev, prev, 32); \
zext(prev, prev, 32); \
return; \
}

Expand All @@ -4025,7 +4026,7 @@ void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr,
break;
case uint32:
amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release));
zero_extend(prev, prev, 32);
zext(prev, prev, 32);
break;
default:
ShouldNotReachHere();
Expand Down Expand Up @@ -4614,7 +4615,7 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
relocate(oop_Relocation::spec(oop_index), [&] {
li32(dst, 0xDEADBEEF);
});
zero_extend(dst, dst, 32);
zext(dst, dst, 32);
}

void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
Expand All @@ -4627,7 +4628,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
relocate(metadata_Relocation::spec(index), [&] {
li32(dst, nk);
});
zero_extend(dst, dst, 32);
zext(dst, dst, 32);
}

address MacroAssembler::reloc_call(Address entry, Register tmp) {
Expand Down Expand Up @@ -4902,7 +4903,7 @@ void MacroAssembler::mul_add(Register out, Register in, Register offset,
mv(tmp, out);
mv(out, zr);
blez(len, L_end);
zero_extend(k, k, 32);
zext(k, k, 32);
slliw(t0, offset, LogBytesPerInt);
add(offset, tmp, t0);
slliw(t0, len, LogBytesPerInt);
Expand Down Expand Up @@ -5436,7 +5437,7 @@ void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Regis
orr(Rd, Rd, tmp2);
}
slli(Rd, Rd, wordSize);
andi(tmp2, Rs, 0xFF); // last byte mask at lower word
zext(tmp2, Rs, 8); // last byte mask at lower word
orr(Rd, Rd, tmp2);
}

Expand Down Expand Up @@ -5838,7 +5839,7 @@ void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp
}
}

void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
void MacroAssembler::zext(Register dst, Register src, int bits) {
switch (bits) {
case 32:
if (UseZba) {
Expand All @@ -5853,19 +5854,17 @@ void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
}
break;
case 8:
if (UseZbb) {
zext_b(dst, src);
return;
}
break;
zext_b(dst, src);
return;
default:
break;
}

slli(dst, src, XLEN - bits);
srli(dst, dst, XLEN - bits);
}

void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
void MacroAssembler::sext(Register dst, Register src, int bits) {
switch (bits) {
case 32:
sext_w(dst, src);
Expand All @@ -5885,6 +5884,7 @@ void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
default:
break;
}

slli(dst, src, XLEN - bits);
srai(dst, dst, XLEN - bits);
}
Expand Down Expand Up @@ -5977,7 +5977,7 @@ void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
} else {
if (dst.first() != src.first()) {
sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32);
sext(dst.first()->as_Register(), src.first()->as_Register(), 32);
}
}
}
Expand Down
Loading

1 comment on commit a7631cc

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.