Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GR-60402] Backport to 24.2: [JDK-8346653] Add vzeroupper upon the entrance of AMD64 sha1 and sha256 stubs. #10373

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,8 +24,16 @@
*/
package jdk.graal.compiler.lir.amd64;

import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
import static jdk.vm.ci.amd64.AMD64.xmm0;
import static jdk.vm.ci.amd64.AMD64.xmm1;
import static jdk.vm.ci.amd64.AMD64.xmm10;
import static jdk.vm.ci.amd64.AMD64.xmm11;
import static jdk.vm.ci.amd64.AMD64.xmm12;
import static jdk.vm.ci.amd64.AMD64.xmm13;
import static jdk.vm.ci.amd64.AMD64.xmm14;
import static jdk.vm.ci.amd64.AMD64.xmm15;
import static jdk.vm.ci.amd64.AMD64.xmm2;
import static jdk.vm.ci.amd64.AMD64.xmm3;
import static jdk.vm.ci.amd64.AMD64.xmm4;
Expand All @@ -35,20 +43,18 @@
import static jdk.vm.ci.amd64.AMD64.xmm8;
import static jdk.vm.ci.amd64.AMD64.xmm9;
import static jdk.vm.ci.code.ValueUtil.asRegister;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;

import jdk.vm.ci.amd64.AMD64.CPUFeature;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
Expand Down Expand Up @@ -76,11 +82,11 @@ public final class AMD64SHA1Op extends AMD64LIRInstruction {
@Temp({OperandFlag.REG}) private Value[] temps;
private final boolean multiBlock;

public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
}

public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
public AMD64SHA1Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
super(TYPE);

Expand All @@ -92,18 +98,40 @@ public AMD64SHA1Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatable

this.multiBlock = multiBlock;

this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
// vzeroupper clears upper bits of xmm0-xmm15
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
xmm11.asValue(),
xmm12.asValue(),
xmm13.asValue(),
xmm14.asValue(),
xmm15.asValue(),
};
} else {
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
}

if (multiBlock) {
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
Expand Down Expand Up @@ -168,6 +196,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Label labelDoneHash = new Label();
Label labelLoop0 = new Label();

if (masm.supports(CPUFeature.AVX)) {
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
// previously executed AVX instructions and the following SHA-1 instructions.
masm.vzeroupper();
}

masm.movdqu(abcd, new AMD64Address(state, 0));
masm.pinsrd(e0, new AMD64Address(state, 16), 3);
masm.movdqu(shufMask, recordExternalAddress(crb, upperWordMask));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,9 +24,17 @@
*/
package jdk.graal.compiler.lir.amd64;

import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
import static jdk.vm.ci.amd64.AMD64.xmm0;
import static jdk.vm.ci.amd64.AMD64.xmm1;
import static jdk.vm.ci.amd64.AMD64.xmm10;
import static jdk.vm.ci.amd64.AMD64.xmm11;
import static jdk.vm.ci.amd64.AMD64.xmm12;
import static jdk.vm.ci.amd64.AMD64.xmm13;
import static jdk.vm.ci.amd64.AMD64.xmm14;
import static jdk.vm.ci.amd64.AMD64.xmm15;
import static jdk.vm.ci.amd64.AMD64.xmm2;
import static jdk.vm.ci.amd64.AMD64.xmm3;
import static jdk.vm.ci.amd64.AMD64.xmm4;
Expand All @@ -36,20 +44,17 @@
import static jdk.vm.ci.amd64.AMD64.xmm8;
import static jdk.vm.ci.amd64.AMD64.xmm9;
import static jdk.vm.ci.code.ValueUtil.asRegister;
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag.BelowEqual;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.core.amd64.AMD64LIRGenerator;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;

import jdk.vm.ci.amd64.AMD64.CPUFeature;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.AllocatableValue;
Expand Down Expand Up @@ -79,11 +84,11 @@ public final class AMD64SHA256Op extends AMD64LIRInstruction {

private final boolean multiBlock;

public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue) {
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue) {
this(tool, bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, Value.ILLEGAL, false);
}

public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
public AMD64SHA256Op(AMD64LIRGenerator tool, AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue,
AllocatableValue limitValue, AllocatableValue resultValue, boolean multiBlock) {
super(TYPE);

Expand All @@ -97,19 +102,40 @@ public AMD64SHA256Op(LIRGeneratorTool tool, AllocatableValue bufValue, Allocatab

this.keyTempValue = tool.newVariable(bufValue.getValueKind());

this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
};
if (tool.supportsCPUFeature(CPUFeature.AVX)) {
// vzeroupper clears upper bits of xmm0-xmm15
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
xmm10.asValue(),
xmm11.asValue(),
xmm12.asValue(),
xmm13.asValue(),
xmm14.asValue(),
xmm15.asValue(),
};
} else {
this.temps = new Value[]{
xmm0.asValue(),
xmm1.asValue(),
xmm2.asValue(),
xmm3.asValue(),
xmm4.asValue(),
xmm5.asValue(),
xmm6.asValue(),
xmm7.asValue(),
xmm8.asValue(),
xmm9.asValue(),
};
}

if (multiBlock) {
this.bufTempValue = tool.newVariable(bufValue.getValueKind());
Expand Down Expand Up @@ -199,6 +225,12 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
// keyTemp replaces the hardcoded rax in the original stub.
Register keyTemp = asRegister(keyTempValue);

if (masm.supports(CPUFeature.AVX)) {
// Insert vzeroupper here to avoid performance penalty of SSE-AVX transition between
// previously executed AVX instructions and the following SHA-256 instructions.
masm.vzeroupper();
}

masm.movdqu(state0, new AMD64Address(state, 0));
masm.movdqu(state1, new AMD64Address(state, 16));

Expand Down
Loading