Skip to content

Commit af4f638

Browse files
authored
Merge branch 'openjdk:master' into master
2 parents b208281 + 075ddef commit af4f638

File tree

68 files changed

+2132
-742
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+2132
-742
lines changed

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,6 +1988,7 @@ enum VectorMask {
19881988

19891989
// Vector Narrowing Integer Right Shift Instructions
19901990
INSN(vnsra_wi, 0b1010111, 0b011, 0b101101);
1991+
INSN(vnsrl_wi, 0b1010111, 0b011, 0b101100);
19911992

19921993
#undef INSN
19931994

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,41 +2248,80 @@ static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
22482248
#define __ masm.
22492249
VectorRegister dst = stub.data<0>();
22502250
VectorRegister src = stub.data<1>();
2251-
VectorRegister tmp = stub.data<2>();
2251+
VectorRegister vtmp = stub.data<2>();
2252+
assert_different_registers(dst, src, vtmp);
2253+
22522254
__ bind(stub.entry());
22532255

2256+
// Active elements (NaNs) are marked in v0 mask register.
22542257
// mul is already set to mf2 in float_to_float16_v.
22552258

2256-
// preserve the payloads of non-canonical NaNs.
2257-
__ vnsra_wi(dst, src, 13, Assembler::v0_t);
2258-
2259-
// preserve the sign bit.
2260-
__ vnsra_wi(tmp, src, 26, Assembler::v0_t);
2261-
__ vsll_vi(tmp, tmp, 10, Assembler::v0_t);
2262-
__ mv(t0, 0x3ff);
2263-
__ vor_vx(tmp, tmp, t0, Assembler::v0_t);
2264-
2265-
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2266-
__ vand_vv(dst, dst, tmp, Assembler::v0_t);
2259+
// Float (32 bits)
2260+
// Bit: 31 30 to 23 22 to 0
2261+
// +---+------------------+-----------------------------+
2262+
// | S | Exponent | Mantissa (Fraction) |
2263+
// +---+------------------+-----------------------------+
2264+
// 1 bit 8 bits 23 bits
2265+
//
2266+
// Float (16 bits)
2267+
// Bit: 15 14 to 10 9 to 0
2268+
// +---+----------------+------------------+
2269+
// | S | Exponent | Mantissa |
2270+
// +---+----------------+------------------+
2271+
// 1 bit 5 bits 10 bits
2272+
const int fp_sign_bits = 1;
2273+
const int fp32_bits = 32;
2274+
const int fp32_mantissa_2nd_part_bits = 9;
2275+
const int fp32_mantissa_3rd_part_bits = 4;
2276+
const int fp16_exponent_bits = 5;
2277+
const int fp16_mantissa_bits = 10;
2278+
2279+
// preserve the sign bit and exponent, clear mantissa.
2280+
__ vnsra_wi(dst, src, fp32_bits - fp_sign_bits - fp16_exponent_bits, Assembler::v0_t);
2281+
__ vsll_vi(dst, dst, fp16_mantissa_bits, Assembler::v0_t);
2282+
2283+
// Preserve high order bit of float NaN in the
2284+
// binary16 result NaN (tenth bit); OR in remaining
2285+
// bits into lower 9 bits of binary 16 significand.
2286+
// | (doppel & 0x007f_e000) >> 13 // 10 bits
2287+
// | (doppel & 0x0000_1ff0) >> 4 // 9 bits
2288+
// | (doppel & 0x0000_000f)); // 4 bits
2289+
//
2290+
// Check j.l.Float.floatToFloat16 for more information.
2291+
// 10 bits
2292+
__ vnsrl_wi(vtmp, src, fp32_mantissa_2nd_part_bits + fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2293+
__ mv(t0, 0x3ff); // retain first part of mantissa in a float 32
2294+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2295+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2296+
// 9 bits
2297+
__ vnsrl_wi(vtmp, src, fp32_mantissa_3rd_part_bits, Assembler::v0_t);
2298+
__ mv(t0, 0x1ff); // retain second part of mantissa in a float 32
2299+
__ vand_vx(vtmp, vtmp, t0, Assembler::v0_t);
2300+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
2301+
// 4 bits
2302+
// Narrow shift is necessary to move data from 32 bits element to 16 bits element in vector register.
2303+
__ vnsrl_wi(vtmp, src, 0, Assembler::v0_t);
2304+
__ vand_vi(vtmp, vtmp, 0xf, Assembler::v0_t);
2305+
__ vor_vv(dst, dst, vtmp, Assembler::v0_t);
22672306

22682307
__ j(stub.continuation());
22692308
#undef __
22702309
}
22712310

22722311
// j.l.Float.float16ToFloat
2273-
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp,
2274-
Register tmp, uint vector_length) {
2312+
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src,
2313+
VectorRegister vtmp, Register tmp, uint vector_length) {
22752314
assert_different_registers(dst, src, vtmp);
22762315

22772316
auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2278-
(dst, src, vtmp, 28, float_to_float16_v_slow_path);
2317+
(dst, src, vtmp, 56, float_to_float16_v_slow_path);
22792318

22802319
// On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
22812320

22822321
vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
22832322

22842323
// check whether there is a NaN.
2285-
// replace v_fclass with vmseq_vv as performance optimization.
2324+
// replace v_fclass with vmfne_vv as performance optimization.
22862325
vmfne_vv(v0, src, src);
22872326
vcpop_m(t0, v0);
22882327

src/hotspot/cpu/x86/vm_version_x86.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3148,7 +3148,10 @@ uint VM_Version::cores_per_cpu() {
31483148
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
31493149
}
31503150
} else if (is_amd_family()) {
3151-
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3151+
result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3152+
if (cpu_family() >= 0x17) { // Zen or later
3153+
result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3154+
}
31523155
} else if (is_zx()) {
31533156
bool supports_topology = supports_processor_topology();
31543157
if (supports_topology) {

src/hotspot/cpu/x86/vm_version_x86.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ class VM_Version : public Abstract_VM_Version {
199199
union ExtCpuid8Ecx {
200200
uint32_t value;
201201
struct {
202-
uint32_t cores_per_cpu : 8,
203-
: 24;
202+
uint32_t threads_per_cpu : 8,
203+
: 24;
204204
} bits;
205205
};
206206

src/hotspot/os/linux/compilerThreadTimeout_linux.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ bool CompilerThreadTimeoutLinux::init_timeout() {
9494
JavaThread* thread = JavaThread::current();
9595

9696
// Create a POSIX timer sending SIGALRM to this thread only.
97-
sigevent_t sev;
97+
struct sigevent sev;
9898
sev.sigev_value.sival_ptr = nullptr;
9999
sev.sigev_signo = TIMEOUT_SIGNAL;
100100
sev.sigev_notify = SIGEV_THREAD_ID;

0 commit comments

Comments
 (0)