Skip to content

Commit

Permalink
8345110: RISC-V: Optimize and and clean up byte reverse assembler rou…
Browse files Browse the repository at this point in the history
…tine
RealFYang committed Nov 27, 2024
1 parent 461ffaf commit f737241
Showing 4 changed files with 45 additions and 99 deletions.
93 changes: 15 additions & 78 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
@@ -2320,41 +2320,6 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
}
}


// reverse bytes in halfword in lower 16 bits and sign-extend
// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
if (UseZbb) {
rev8(Rd, Rs);
srai(Rd, Rd, 48);
return;
}
assert_different_registers(Rs, tmp);
assert_different_registers(Rd, tmp);
srli(tmp, Rs, 8);
andi(tmp, tmp, 0xFF);
slli(Rd, Rs, 56);
srai(Rd, Rd, 48); // sign-extend
orr(Rd, Rd, tmp);
}

// reverse bytes in lower word and sign-extend
// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
if (UseZbb) {
rev8(Rd, Rs);
srai(Rd, Rd, 32);
return;
}
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1, tmp2);
revb_h_w_u(Rd, Rs, tmp1, tmp2);
slli(tmp2, Rd, 48);
srai(tmp2, tmp2, 32); // sign-extend
srli(Rd, Rd, 16);
orr(Rd, Rd, tmp2);
}

// reverse bytes in halfword in lower 16 bits and zero-extend
// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
@@ -2391,56 +2356,28 @@ void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Registe
orr(Rd, Rd, tmp2);
}

// This method is only used for revb_h
// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1);
srli(tmp1, Rs, 48);
andi(tmp2, tmp1, 0xFF);
slli(tmp2, tmp2, 8);
srli(tmp1, tmp1, 8);
orr(tmp1, tmp1, tmp2);
slli(Rd, Rs, 16);
orr(Rd, Rd, tmp1);
}

// reverse bytes in each halfword
// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
if (UseZbb) {
assert_different_registers(Rs, tmp1);
assert_different_registers(Rd, tmp1);
rev8(Rd, Rs);
zero_extend(tmp1, Rd, 32);
roriw(tmp1, tmp1, 16);
slli(tmp1, tmp1, 32);
srli(Rd, Rd, 32);
roriw(Rd, Rd, 16);
zero_extend(Rd, Rd, 32);
orr(Rd, Rd, tmp1);
return;
}
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1, tmp2);
revb_h_helper(Rd, Rs, tmp1, tmp2);
for (int i = 0; i < 3; ++i) {
revb_h_helper(Rd, Rd, tmp1, tmp2);
}
}

// reverse bytes in each word
// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
// reverse bytes in lower word, sign-extend
// Rd[32:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
if (UseZbb) {
rev8(Rd, Rs);
rori(Rd, Rd, 32);
srai(Rd, Rd, 32);
return;
}
assert_different_registers(Rs, tmp1, tmp2);
assert_different_registers(Rd, tmp1, tmp2);
revb(Rd, Rs, tmp1, tmp2);
ror_imm(Rd, Rd, 32);
andi(tmp1, Rs, 0xFF);
slli(tmp1, tmp1, 8);
for (int step = 8; step < 24; step += 8) {
srli(tmp2, Rs, step);
andi(tmp2, tmp2, 0xFF);
orr(tmp1, tmp1, tmp2);
slli(tmp1, tmp1, 8);
}
srli(Rd, Rs, 24);
andi(Rd, Rd, 0xFF);
orr(Rd, tmp1, Rd);
sign_extend(Rd, Rd, 32);
}

// reverse bytes in doubleword
6 changes: 1 addition & 5 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
@@ -902,13 +902,9 @@ class MacroAssembler: public Assembler {
void orn(Register Rd, Register Rs1, Register Rs2);

// revb
void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword

void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
24 changes: 18 additions & 6 deletions src/hotspot/cpu/riscv/riscv_b.ad
Original file line number Diff line number Diff line change
@@ -181,11 +181,15 @@ instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
match(Set dst (ReverseBytesI src));

ins_cost(ALU_COST * 2);
format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %}
format %{
"rev8 $dst, $src\t#@bytes_reverse_int_b\t\n"
"srai $dst, $dst, 32\t\n"
%}

ins_encode %{
assert(UseZbb, "must be");
__ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 32);
%}

ins_pipe(ialu_reg);
@@ -209,11 +213,15 @@ instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
match(Set dst (ReverseBytesUS src));

ins_cost(ALU_COST * 2);
format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
format %{
"rev8 $dst, $src\t#@bytes_reverse_unsigned_short_b\t\n"
"srli $dst, $dst, 48\t\n"
%}

ins_encode %{
assert(UseZbb, "must be");
__ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
__ srli(as_Register($dst$$reg), as_Register($dst$$reg), 48);
%}

ins_pipe(ialu_reg);
@@ -223,11 +231,15 @@ instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
match(Set dst (ReverseBytesS src));

ins_cost(ALU_COST * 2);
format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %}
format %{
"rev8 $dst, $src\t#@bytes_reverse_short_b\t\n"
"srai $dst, $dst, 48\t\n"
%}

ins_encode %{
assert(UseZbb, "must be");
__ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 48);
%}

ins_pipe(ialu_reg);
21 changes: 11 additions & 10 deletions src/hotspot/cpu/riscv/templateTable_riscv.cpp
Original file line number Diff line number Diff line change
@@ -1621,13 +1621,14 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {

// load branch displacement
if (!is_wide) {
// sign extend the 16 bit value in x12
__ lb(x12, at_bcp(1));
__ lbu(t1, at_bcp(2));
__ slli(x12, x12, 8);
__ add(x12, x12, t1);
} else {
__ lwu(x12, at_bcp(1));
__ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
__ revb_w(x12, x12);
}

// Handle all the JSR stuff here, then exit.
@@ -1892,8 +1893,8 @@ void TemplateTable::tableswitch() {
// load lo & hi
__ lwu(x12, Address(x11, BytesPerInt));
__ lwu(x13, Address(x11, 2 * BytesPerInt));
__ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
__ revb_w(x12, x12);
__ revb_w(x13, x13);
// check against lo & hi
__ blt(x10, x12, default_case);
__ bgt(x10, x13, default_case);
@@ -1904,7 +1905,7 @@ void TemplateTable::tableswitch() {
__ profile_switch_case(x10, x11, x12);
// continue execution
__ bind(continue_execution);
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
__ revb_w(x13, x13);
__ add(xbcp, xbcp, x13);
__ load_unsigned_byte(t0, Address(xbcp));
__ dispatch_only(vtos, /*generate_poll*/true);
@@ -1924,7 +1925,7 @@ void TemplateTable::fast_linearswitch() {
transition(itos, vtos);
Label loop_entry, loop, found, continue_execution;
// bswap x10 so we can avoid bswapping the table entries
__ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
__ revb_w(x10, x10);
// align xbcp
__ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
// this instruction (change offsets
@@ -1953,7 +1954,7 @@ void TemplateTable::fast_linearswitch() {
__ profile_switch_case(x11, x10, x9);
// continue execution
__ bind(continue_execution);
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
__ revb_w(x13, x13);
__ add(xbcp, xbcp, x13);
__ lbu(t0, Address(xbcp, 0));
__ dispatch_only(vtos, /*generate_poll*/true);
@@ -2024,7 +2025,7 @@ void TemplateTable::fast_binaryswitch() {
// Convert array[h].match to native byte-ordering before compare
__ shadd(temp, h, array, temp, 3);
__ lwu(temp, Address(temp, 0));
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
__ revb_w(temp, temp);

Label L_done, L_greater;
__ bge(key, temp, L_greater);
@@ -2047,14 +2048,14 @@ void TemplateTable::fast_binaryswitch() {
// Convert array[i].match to native byte-ordering before compare
__ shadd(temp, i, array, temp, 3);
__ lwu(temp, Address(temp, 0));
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
__ revb_w(temp, temp);
__ bne(key, temp, default_case);

// entry found -> j = offset
__ shadd(temp, i, array, temp, 3);
__ lwu(j, Address(temp, BytesPerInt));
__ profile_switch_case(i, key, array);
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
__ revb_w(j, j);

__ add(temp, xbcp, j);
__ load_unsigned_byte(t0, Address(temp, 0));
@@ -2067,7 +2068,7 @@ void TemplateTable::fast_binaryswitch() {
__ bind(default_case);
__ profile_switch_default(i);
__ lwu(j, Address(array, -2 * BytesPerInt));
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
__ revb_w(j, j);

__ add(temp, xbcp, j);
__ load_unsigned_byte(t0, Address(temp, 0));

0 comments on commit f737241

Please sign in to comment.