Skip to content

Commit

Permalink
Initial Float16 support for JDK-24 mainline
Browse files Browse the repository at this point in the history
  • Loading branch information
jatin-bhateja committed Oct 14, 2024
1 parent 41ee582 commit c5536c5
Show file tree
Hide file tree
Showing 40 changed files with 3,788 additions and 72 deletions.
232 changes: 232 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3446,6 +3446,22 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_int16(0x6F, (0xC0 | encode));
}

void Assembler::vmovw(XMMRegister dst, Register src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x6E, (0xC0 | encode));
}

void Assembler::vmovw(Register dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x7E, (0xC0 | encode));
}

void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
InstructionMark im(this);
Expand Down Expand Up @@ -8353,6 +8369,222 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_operand(dst, src, 0);
}

void Assembler::evaddph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x58, (0xC0 | encode));
}

void Assembler::evaddph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x58);
emit_operand(dst, src, 0);
}

void Assembler::evsubph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5C, (0xC0 | encode));
}

void Assembler::evsubph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x5C);
emit_operand(dst, src, 0);
}

void Assembler::evmulph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x59, (0xC0 | encode));
}

void Assembler::evmulph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x59);
emit_operand(dst, src, 0);
}

void Assembler::evminph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}

void Assembler::evminph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x5D);
emit_operand(dst, src, 0);
}

void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}

void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x5F);
emit_operand(dst, src, 0);
}

void Assembler::evdivph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5E, (0xC0 | encode));
}

void Assembler::evdivph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x5E);
emit_operand(dst, src, 0);
}

void Assembler::evsqrtph(XMMRegister dst, XMMRegister src1, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x51, (0xC0 | encode));
}

void Assembler::evsqrtph(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x51);
emit_operand(dst, src, 0);
}

void Assembler::evfmadd132ph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP6, &attributes);
emit_int16(0x98, (0xC0 | encode));
}

void Assembler::evfmadd132ph(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP6, &attributes);
emit_int8(0x98);
emit_operand(dst, src, 0);
}

void Assembler::eaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x58, (0xC0 | encode));
}

void Assembler::esubsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5C, (0xC0 | encode));
}

void Assembler::edivsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5E, (0xC0 | encode));
}

void Assembler::emulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x59, (0xC0 | encode));
}

void Assembler::emaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}

void Assembler::eminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}

void Assembler::esqrtsh(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x51, (0xC0 | encode));
}

void Assembler::efmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
assert(VM_Version::supports_avx512_fp16(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP6, &attributes);
emit_int16((unsigned char)0x99, (0xC0 | encode));
}

void Assembler::psubb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down
32 changes: 32 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,8 @@ class Assembler : public AbstractAssembler {
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3,
VEX_OPCODE_0F_3C = 0x4,
VEX_OPCODE_MAP5 = 0x5,
VEX_OPCODE_MAP6 = 0x6,
VEX_OPCODE_MASK = 0x1F
};

Expand Down Expand Up @@ -1808,6 +1810,9 @@ class Assembler : public AbstractAssembler {
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);

void vmovw(XMMRegister dst, Register src);
void vmovw(Register dst, XMMRegister src);

#ifdef _LP64
void movsbq(Register dst, Address src);
void movsbq(Register dst, Register src);
Expand Down Expand Up @@ -2671,6 +2676,33 @@ class Assembler : public AbstractAssembler {
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// FP16 instructions
void eaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void esubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void emulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void edivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void emaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void eminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void esqrtsh(XMMRegister dst, XMMRegister src);
void efmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);

void evaddph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evaddph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evsubph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evsubph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evdivph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evdivph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evmulph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evmulph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evminph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evminph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evmaxph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evfmadd132ph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evfmadd132ph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evsqrtph(XMMRegister dst, XMMRegister src1, int vector_len);
void evsqrtph(XMMRegister dst, Address src1, int vector_len);

// Leaf level assembler routines for masked operations.
void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
Expand Down
36 changes: 36 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6475,3 +6475,39 @@ void C2_MacroAssembler::vector_rearrange_int_float(BasicType bt, XMMRegister dst
vpermps(dst, shuffle, src, vlen_enc);
}
}

void C2_MacroAssembler::efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
switch(opcode) {
case Op_AddHF: eaddsh(dst, src1, src2); break;
case Op_SubHF: esubsh(dst, src1, src2); break;
case Op_MulHF: emulsh(dst, src1, src2); break;
case Op_DivHF: edivsh(dst, src1, src2); break;
case Op_MaxHF: emaxsh(dst, src1, src2); break;
case Op_MinHF: eminsh(dst, src1, src2); break;
default: assert(false, "%s", NodeClassNames[opcode]); break;
}
}

void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
switch(opcode) {
case Op_AddVHF: evaddph(dst, src1, src2, vlen_enc); break;
case Op_SubVHF: evsubph(dst, src1, src2, vlen_enc); break;
case Op_MulVHF: evmulph(dst, src1, src2, vlen_enc); break;
case Op_DivVHF: evdivph(dst, src1, src2, vlen_enc); break;
case Op_MaxVHF: evmaxph(dst, src1, src2, vlen_enc); break;
case Op_MinVHF: evminph(dst, src1, src2, vlen_enc); break;
default: assert(false, "%s", NodeClassNames[opcode]); break;
}
}

void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc) {
switch(opcode) {
case Op_AddVHF: evaddph(dst, src1, src2, vlen_enc); break;
case Op_SubVHF: evsubph(dst, src1, src2, vlen_enc); break;
case Op_MulVHF: evmulph(dst, src1, src2, vlen_enc); break;
case Op_DivVHF: evdivph(dst, src1, src2, vlen_enc); break;
case Op_MaxVHF: evmaxph(dst, src1, src2, vlen_enc); break;
case Op_MinVHF: evminph(dst, src1, src2, vlen_enc); break;
default: assert(false, "%s", NodeClassNames[opcode]); break;
}
}
5 changes: 5 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,11 @@
void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
XMMRegister src, int vlen_enc);

void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);

void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);

void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);

void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset,
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
Expand Down
Loading

0 comments on commit c5536c5

Please sign in to comment.