From 1aa7a9397a0f3c3d1502ba81e172994a99173446 Mon Sep 17 00:00:00 2001 From: Daco Harkes Date: Fri, 3 Feb 2023 12:55:31 +0000 Subject: [PATCH] [vm] `MemoryCopyInstr` optimize constant src and dest start When constants are passed in to the source and destination start, no registers are needed for these. The constants are directly compiled into the machine code. If the constant happens to be 0, no machine code is emitted at all. I did not measure any speed improvements. Likely the micro-code schedulers in the CPUs already noticed the no-ops. I have verified manually that we emit smaller machine code with these changes on x64. TEST=runtime/vm/compiler/backend/memory_copy_test.cc Bug: https://github.com/dart-lang/sdk/issues/51031 Change-Id: I70f12c9ae299b44a8f5007ca3a8c5ee56a9aff40 Cq-Include-Trybots: luci.dart.try:vm-precomp-ffi-qemu-linux-release-riscv64-try,vm-precomp-ffi-qemu-linux-release-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-android-debug-arm-try,vm-kernel-nnbd-mac-debug-arm64-try,vm-kernel-nnbd-win-debug-x64-try,vm-kernel-win-debug-x64c-try,vm-kernel-win-debug-ia32-try,vm-kernel-nnbd-linux-debug-ia32-try,vm-reload-rollback-linux-debug-x64-try Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/279170 Reviewed-by: Alexander Markov --- runtime/vm/compiler/backend/il.h | 5 ++- runtime/vm/compiler/backend/il_arm.cc | 44 +++++++++++++++---------- runtime/vm/compiler/backend/il_arm64.cc | 44 +++++++++++++++---------- runtime/vm/compiler/backend/il_ia32.cc | 26 ++++++++++----- runtime/vm/compiler/backend/il_riscv.cc | 42 ++++++++++++++--------- runtime/vm/compiler/backend/il_x64.cc | 32 +++++++++++------- 6 files changed, 117 insertions(+), 76 deletions(-) diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h index 57e6e2cb8bed..4d96234c1c70 100644 --- a/runtime/vm/compiler/backend/il.h +++ b/runtime/vm/compiler/backend/il.h @@ -3085,12 +3085,11 @@ class MemoryCopyInstr : public TemplateInstruction<5, NoThrow> { private: // Set array_reg to point to the index indicated by start (contained in - // start_reg) of the typed data or string in array (contained in array_reg). + // start_loc) of the typed data or string in array (contained in array_reg). void EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg); + Location start_loc); static bool IsArrayTypeSupported(classid_t array_cid) { if (IsTypedDataBaseClassId(array_cid)) { diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc index 377d69ee692d..19c4b6835741 100644 --- a/runtime/vm/compiler/backend/il_arm.cc +++ b/runtime/vm/compiler/backend/il_arm.cc @@ -165,8 +165,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall); locs->set_in(kSrcPos, Location::WritableRegister()); locs->set_in(kDestPos, Location::WritableRegister()); - locs->set_in(kSrcStartPos, Location::RequiresRegister()); - locs->set_in(kDestStartPos, Location::RequiresRegister()); + locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start())); + locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start())); locs->set_in(kLengthPos, Location::WritableRegister()); for (intptr_t i = 0; i < kNumTemps; i++) { locs->set_temp(i, Location::RequiresRegister()); @@ -177,8 +177,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { const Register src_reg = locs()->in(kSrcPos).reg(); const Register dest_reg = locs()->in(kDestPos).reg(); - const Register src_start_reg = locs()->in(kSrcStartPos).reg(); - const Register dest_start_reg = locs()->in(kDestStartPos).reg(); + const Location src_start_loc = locs()->in(kSrcStartPos); + const Location dest_start_loc = locs()->in(kDestStartPos); const Register length_reg = locs()->in(kLengthPos).reg(); const Register temp_reg = locs()->temp(0).reg(); @@ -187,10 +187,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { temp_regs |= 1 << locs()->temp(i).reg(); } - EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg, - src_start_reg); - EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg, - dest_start_reg); + EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc); + EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc); compiler::Label loop, done; @@ -230,44 +228,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg) { + Location start_loc) { + intptr_t offset; if (IsTypedDataBaseClassId(array_cid)) { __ ldr(array_reg, compiler::FieldAddress( array_reg, compiler::target::PointerBase::data_offset())); + offset = 0; } else { switch (array_cid) { case kOneByteStringCid: - __ add( - array_reg, array_reg, - compiler::Operand(compiler::target::OneByteString::data_offset() - - kHeapObjectTag)); + offset = + compiler::target::OneByteString::data_offset() - kHeapObjectTag; break; case kTwoByteStringCid: - __ add( - array_reg, array_reg, - compiler::Operand(compiler::target::OneByteString::data_offset() - - kHeapObjectTag)); + offset = + compiler::target::TwoByteString::data_offset() - kHeapObjectTag; break; case kExternalOneByteStringCid: __ ldr(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalOneByteString:: external_data_offset())); + offset = 0; break; case kExternalTwoByteStringCid: __ ldr(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalTwoByteString:: external_data_offset())); + offset = 0; break; default: UNREACHABLE(); break; } } + ASSERT(start_loc.IsRegister() || start_loc.IsConstant()); + if (start_loc.IsConstant()) { + const auto& constant = start_loc.constant(); + ASSERT(constant.IsInteger()); + const int64_t start_value = Integer::Cast(constant).AsInt64Value(); + const intptr_t add_value = start_value * element_size_ + offset; + __ AddImmediate(array_reg, add_value); + return; + } + __ AddImmediate(array_reg, offset); + const Register start_reg = start_loc.reg(); intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1; if (shift < 0) { __ add(array_reg, array_reg, compiler::Operand(start_reg, ASR, -shift)); diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc index e1e0bb610040..b9313d5e41f8 100644 --- a/runtime/vm/compiler/backend/il_arm64.cc +++ b/runtime/vm/compiler/backend/il_arm64.cc @@ -162,8 +162,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall); locs->set_in(kSrcPos, Location::WritableRegister()); locs->set_in(kDestPos, Location::WritableRegister()); - locs->set_in(kSrcStartPos, Location::RequiresRegister()); - locs->set_in(kDestStartPos, Location::RequiresRegister()); + locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start())); + locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start())); locs->set_in(kLengthPos, Location::WritableRegister()); locs->set_temp(0, element_size_ == 16 ? Location::Pair(Location::RequiresRegister(), @@ -175,8 +175,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { const Register src_reg = locs()->in(kSrcPos).reg(); const Register dest_reg = locs()->in(kDestPos).reg(); - const Register src_start_reg = locs()->in(kSrcStartPos).reg(); - const Register dest_start_reg = locs()->in(kDestStartPos).reg(); + const Location src_start_loc = locs()->in(kSrcStartPos); + const Location dest_start_loc = locs()->in(kDestStartPos); const Register length_reg = locs()->in(kLengthPos).reg(); Register temp_reg, temp_reg2; @@ -189,10 +189,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { temp_reg2 = kNoRegister; } - EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg, - src_start_reg); - EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg, - dest_start_reg); + EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc); + EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc); compiler::Label loop, done; @@ -236,44 +234,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg) { + Location start_loc) { + intptr_t offset; if (IsTypedDataBaseClassId(array_cid)) { __ ldr(array_reg, compiler::FieldAddress( array_reg, compiler::target::PointerBase::data_offset())); + offset = 0; } else { switch (array_cid) { case kOneByteStringCid: - __ add( - array_reg, array_reg, - compiler::Operand(compiler::target::OneByteString::data_offset() - - kHeapObjectTag)); + offset = + compiler::target::OneByteString::data_offset() - kHeapObjectTag; break; case kTwoByteStringCid: - __ add( - array_reg, array_reg, - compiler::Operand(compiler::target::OneByteString::data_offset() - - kHeapObjectTag)); + offset = + compiler::target::TwoByteString::data_offset() - kHeapObjectTag; break; case kExternalOneByteStringCid: __ ldr(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalOneByteString:: external_data_offset())); + offset = 0; break; case kExternalTwoByteStringCid: __ ldr(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalTwoByteString:: external_data_offset())); + offset = 0; break; default: UNREACHABLE(); break; } } + ASSERT(start_loc.IsRegister() || start_loc.IsConstant()); + if (start_loc.IsConstant()) { + const auto& constant = start_loc.constant(); + ASSERT(constant.IsInteger()); + const int64_t start_value = Integer::Cast(constant).AsInt64Value(); + const intptr_t add_value = start_value * element_size_ + offset; + __ AddImmediate(array_reg, add_value); + return; + } + __ AddImmediate(array_reg, offset); + const Register start_reg = start_loc.reg(); intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1; if (shift < 0) { #if defined(DART_COMPRESSED_POINTERS) diff --git a/runtime/vm/compiler/backend/il_ia32.cc b/runtime/vm/compiler/backend/il_ia32.cc index c06057ce0d62..fdaf91c67ac0 100644 --- a/runtime/vm/compiler/backend/il_ia32.cc +++ b/runtime/vm/compiler/backend/il_ia32.cc @@ -85,24 +85,23 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall); locs->set_in(kSrcPos, Location::RequiresRegister()); locs->set_in(kDestPos, Location::RegisterLocation(EDI)); - locs->set_in(kSrcStartPos, Location::WritableRegister()); - locs->set_in(kDestStartPos, Location::WritableRegister()); + locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start())); + locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start())); locs->set_in(kLengthPos, Location::RegisterLocation(ECX)); return locs; } void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { const Register src_reg = locs()->in(kSrcPos).reg(); - const Register src_start_reg = locs()->in(kSrcStartPos).reg(); - const Register dest_start_reg = locs()->in(kDestStartPos).reg(); + const Location src_start_loc = locs()->in(kSrcStartPos); + const Location dest_start_loc = locs()->in(kDestStartPos); // Save ESI which is THR. __ pushl(ESI); __ movl(ESI, src_reg); - EmitComputeStartPointer(compiler, src_cid_, src_start(), ESI, src_start_reg); - EmitComputeStartPointer(compiler, dest_cid_, dest_start(), EDI, - dest_start_reg); + EmitComputeStartPointer(compiler, src_cid_, ESI, src_start_loc); + EmitComputeStartPointer(compiler, dest_cid_, EDI, dest_start_loc); if (element_size_ <= 4) { __ SmiUntag(ECX); } else if (element_size_ == 16) { @@ -128,9 +127,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg) { + Location start_loc) { intptr_t offset; if (IsTypedDataBaseClassId(array_cid)) { __ movl(array_reg, @@ -166,6 +164,16 @@ void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, break; } } + ASSERT(start_loc.IsRegister() || start_loc.IsConstant()); + if (start_loc.IsConstant()) { + const auto& constant = start_loc.constant(); + ASSERT(constant.IsInteger()); + const int64_t start_value = Integer::Cast(constant).AsInt64Value(); + const intptr_t add_value = start_value * element_size_ + offset; + __ AddImmediate(array_reg, add_value); + return; + } + const Register start_reg = start_loc.reg(); ScaleFactor scale; switch (element_size_) { case 1: diff --git a/runtime/vm/compiler/backend/il_riscv.cc b/runtime/vm/compiler/backend/il_riscv.cc index 85cecbdf342f..0bb628cad5c8 100644 --- a/runtime/vm/compiler/backend/il_riscv.cc +++ b/runtime/vm/compiler/backend/il_riscv.cc @@ -179,8 +179,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall); locs->set_in(kSrcPos, Location::WritableRegister()); locs->set_in(kDestPos, Location::WritableRegister()); - locs->set_in(kSrcStartPos, Location::RequiresRegister()); - locs->set_in(kDestStartPos, Location::RequiresRegister()); + locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start())); + locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start())); locs->set_in(kLengthPos, Location::WritableRegister()); return locs; } @@ -188,14 +188,12 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { const Register src_reg = locs()->in(kSrcPos).reg(); const Register dest_reg = locs()->in(kDestPos).reg(); - const Register src_start_reg = locs()->in(kSrcStartPos).reg(); - const Register dest_start_reg = locs()->in(kDestStartPos).reg(); + const Location src_start_loc = locs()->in(kSrcStartPos); + const Location dest_start_loc = locs()->in(kDestStartPos); const Register length_reg = locs()->in(kLengthPos).reg(); - EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg, - src_start_reg); - EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg, - dest_start_reg); + EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc); + EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc); compiler::Label loop, done; @@ -272,42 +270,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg) { + Location start_loc) { + intptr_t offset; if (IsTypedDataBaseClassId(array_cid)) { __ lx(array_reg, compiler::FieldAddress(array_reg, compiler::target::PointerBase::data_offset())); + offset = 0; } else { switch (array_cid) { case kOneByteStringCid: - __ addi( - array_reg, array_reg, - compiler::target::OneByteString::data_offset() - kHeapObjectTag); + offset = + compiler::target::OneByteString::data_offset() - kHeapObjectTag; break; case kTwoByteStringCid: - __ addi( - array_reg, array_reg, - compiler::target::OneByteString::data_offset() - kHeapObjectTag); + offset = + compiler::target::TwoByteString::data_offset() - kHeapObjectTag; break; case kExternalOneByteStringCid: __ lx(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalOneByteString:: external_data_offset())); + offset = 0; break; case kExternalTwoByteStringCid: __ lx(array_reg, compiler::FieldAddress(array_reg, compiler::target::ExternalTwoByteString:: external_data_offset())); + offset = 0; break; default: UNREACHABLE(); break; } } + ASSERT(start_loc.IsRegister() || start_loc.IsConstant()); + if (start_loc.IsConstant()) { + const auto& constant = start_loc.constant(); + ASSERT(constant.IsInteger()); + const int64_t start_value = Integer::Cast(constant).AsInt64Value(); + const intptr_t add_value = start_value * element_size_ + offset; + __ AddImmediate(array_reg, add_value); + return; + } + __ AddImmediate(array_reg, offset); + const Register start_reg = start_loc.reg(); intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1; if (shift < 0) { __ srai(TMP, start_reg, -shift); diff --git a/runtime/vm/compiler/backend/il_x64.cc b/runtime/vm/compiler/backend/il_x64.cc index 93223757a9b3..a6400b5e5763 100644 --- a/runtime/vm/compiler/backend/il_x64.cc +++ b/runtime/vm/compiler/backend/il_x64.cc @@ -160,19 +160,18 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone, LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall); locs->set_in(kSrcPos, Location::RegisterLocation(RSI)); locs->set_in(kDestPos, Location::RegisterLocation(RDI)); - locs->set_in(kSrcStartPos, Location::WritableRegister()); - locs->set_in(kDestStartPos, Location::WritableRegister()); + locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start())); + locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start())); locs->set_in(kLengthPos, Location::RegisterLocation(RCX)); return locs; } void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { - const Register src_start_reg = locs()->in(kSrcStartPos).reg(); - const Register dest_start_reg = locs()->in(kDestStartPos).reg(); + const Location src_start_loc = locs()->in(kSrcStartPos); + const Location dest_start_loc = locs()->in(kDestStartPos); - EmitComputeStartPointer(compiler, src_cid_, src_start(), RSI, src_start_reg); - EmitComputeStartPointer(compiler, dest_cid_, dest_start(), RDI, - dest_start_reg); + EmitComputeStartPointer(compiler, src_cid_, RSI, src_start_loc); + EmitComputeStartPointer(compiler, dest_cid_, RDI, dest_start_loc); if (element_size_ <= 8) { __ SmiUntag(RCX); } else { @@ -199,9 +198,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) { void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, classid_t array_cid, - Value* start, Register array_reg, - Register start_reg) { + Location start_loc) { intptr_t offset; if (IsTypedDataBaseClassId(array_cid)) { __ movq(array_reg, @@ -237,6 +235,16 @@ void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler, break; } } + ASSERT(start_loc.IsRegister() || start_loc.IsConstant()); + if (start_loc.IsConstant()) { + const auto& constant = start_loc.constant(); + ASSERT(constant.IsInteger()); + const int64_t start_value = Integer::Cast(constant).AsInt64Value(); + const intptr_t add_value = start_value * element_size_ + offset; + __ AddImmediate(array_reg, add_value); + return; + } + const Register start_reg = start_loc.reg(); ScaleFactor scale; switch (element_size_) { case 1: @@ -936,7 +944,7 @@ static Condition EmitNullAwareInt64ComparisonOp(FlowGraphCompiler* compiler, __ OBJ(cmp)(left, right); __ j(EQUAL, equal_result); __ OBJ(mov)(TMP, left); - __ OBJ(and)(TMP, right); + __ OBJ (and)(TMP, right); __ BranchIfSmi(TMP, not_equal_result); __ CompareClassId(left, kMintCid); __ j(NOT_EQUAL, not_equal_result); @@ -4036,8 +4044,8 @@ LocationSummary* BoxInt64Instr::MakeLocationSummary(Zone* zone, object_store->allocate_mint_without_fpu_regs_stub() ->untag() ->InVMIsolateHeap(); - const bool shared_slow_path_call = SlowPathSharingSupported(opt) && - !stubs_in_vm_isolate; + const bool shared_slow_path_call = + SlowPathSharingSupported(opt) && !stubs_in_vm_isolate; LocationSummary* summary = new (zone) LocationSummary( zone, kNumInputs, kNumTemps, ValueFitsSmi()