Skip to content

Commit

Permalink
[vm] MemoryCopyInstr optimize constant src and dest start
Browse files Browse the repository at this point in the history
When constants are passed in to the source and destination start,
no registers are needed for these. The constants are directly compiled
into the machine code. If the constant happens to be 0, no machine
code is emitted at all.

I did not measure any speed improvements. Likely the micro-code
schedulers in the CPUs already noticed the no-ops.

I have verified manually that we emit smaller machine code with
these changes on x64.

TEST=runtime/vm/compiler/backend/memory_copy_test.cc

Bug: #51031

Change-Id: I70f12c9ae299b44a8f5007ca3a8c5ee56a9aff40
Cq-Include-Trybots: luci.dart.try:vm-precomp-ffi-qemu-linux-release-riscv64-try,vm-precomp-ffi-qemu-linux-release-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-android-debug-arm-try,vm-kernel-nnbd-mac-debug-arm64-try,vm-kernel-nnbd-win-debug-x64-try,vm-kernel-win-debug-x64c-try,vm-kernel-win-debug-ia32-try,vm-kernel-nnbd-linux-debug-ia32-try,vm-reload-rollback-linux-debug-x64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/279170
Reviewed-by: Alexander Markov <alexmarkov@google.com>
  • Loading branch information
dcharkes authored and Commit Queue committed Feb 3, 2023
1 parent bf7d643 commit 1aa7a93
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 76 deletions.
5 changes: 2 additions & 3 deletions runtime/vm/compiler/backend/il.h
Original file line number Diff line number Diff line change
Expand Up @@ -3085,12 +3085,11 @@ class MemoryCopyInstr : public TemplateInstruction<5, NoThrow> {

private:
// Set array_reg to point to the index indicated by start (contained in
// start_reg) of the typed data or string in array (contained in array_reg).
// start_loc) of the typed data or string in array (contained in array_reg).
void EmitComputeStartPointer(FlowGraphCompiler* compiler,
classid_t array_cid,
Value* start,
Register array_reg,
Register start_reg);
Location start_loc);

static bool IsArrayTypeSupported(classid_t array_cid) {
if (IsTypedDataBaseClassId(array_cid)) {
Expand Down
44 changes: 26 additions & 18 deletions runtime/vm/compiler/backend/il_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
locs->set_in(kSrcPos, Location::WritableRegister());
locs->set_in(kDestPos, Location::WritableRegister());
locs->set_in(kSrcStartPos, Location::RequiresRegister());
locs->set_in(kDestStartPos, Location::RequiresRegister());
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
locs->set_in(kLengthPos, Location::WritableRegister());
for (intptr_t i = 0; i < kNumTemps; i++) {
locs->set_temp(i, Location::RequiresRegister());
Expand All @@ -177,8 +177,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register src_reg = locs()->in(kSrcPos).reg();
const Register dest_reg = locs()->in(kDestPos).reg();
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
const Location src_start_loc = locs()->in(kSrcStartPos);
const Location dest_start_loc = locs()->in(kDestStartPos);
const Register length_reg = locs()->in(kLengthPos).reg();

const Register temp_reg = locs()->temp(0).reg();
Expand All @@ -187,10 +187,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
temp_regs |= 1 << locs()->temp(i).reg();
}

EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
src_start_reg);
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
dest_start_reg);
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);

compiler::Label loop, done;

Expand Down Expand Up @@ -230,44 +228,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {

void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
classid_t array_cid,
Value* start,
Register array_reg,
Register start_reg) {
Location start_loc) {
intptr_t offset;
if (IsTypedDataBaseClassId(array_cid)) {
__ ldr(array_reg,
compiler::FieldAddress(
array_reg, compiler::target::PointerBase::data_offset()));
offset = 0;
} else {
switch (array_cid) {
case kOneByteStringCid:
__ add(
array_reg, array_reg,
compiler::Operand(compiler::target::OneByteString::data_offset() -
kHeapObjectTag));
offset =
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
break;
case kTwoByteStringCid:
__ add(
array_reg, array_reg,
compiler::Operand(compiler::target::OneByteString::data_offset() -
kHeapObjectTag));
offset =
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
break;
case kExternalOneByteStringCid:
__ ldr(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalOneByteString::
external_data_offset()));
offset = 0;
break;
case kExternalTwoByteStringCid:
__ ldr(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalTwoByteString::
external_data_offset()));
offset = 0;
break;
default:
UNREACHABLE();
break;
}
}
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
if (start_loc.IsConstant()) {
const auto& constant = start_loc.constant();
ASSERT(constant.IsInteger());
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
const intptr_t add_value = start_value * element_size_ + offset;
__ AddImmediate(array_reg, add_value);
return;
}
__ AddImmediate(array_reg, offset);
const Register start_reg = start_loc.reg();
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
if (shift < 0) {
__ add(array_reg, array_reg, compiler::Operand(start_reg, ASR, -shift));
Expand Down
44 changes: 26 additions & 18 deletions runtime/vm/compiler/backend/il_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
locs->set_in(kSrcPos, Location::WritableRegister());
locs->set_in(kDestPos, Location::WritableRegister());
locs->set_in(kSrcStartPos, Location::RequiresRegister());
locs->set_in(kDestStartPos, Location::RequiresRegister());
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
locs->set_in(kLengthPos, Location::WritableRegister());
locs->set_temp(0, element_size_ == 16
? Location::Pair(Location::RequiresRegister(),
Expand All @@ -175,8 +175,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register src_reg = locs()->in(kSrcPos).reg();
const Register dest_reg = locs()->in(kDestPos).reg();
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
const Location src_start_loc = locs()->in(kSrcStartPos);
const Location dest_start_loc = locs()->in(kDestStartPos);
const Register length_reg = locs()->in(kLengthPos).reg();

Register temp_reg, temp_reg2;
Expand All @@ -189,10 +189,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
temp_reg2 = kNoRegister;
}

EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
src_start_reg);
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
dest_start_reg);
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);

compiler::Label loop, done;

Expand Down Expand Up @@ -236,44 +234,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {

void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
classid_t array_cid,
Value* start,
Register array_reg,
Register start_reg) {
Location start_loc) {
intptr_t offset;
if (IsTypedDataBaseClassId(array_cid)) {
__ ldr(array_reg,
compiler::FieldAddress(
array_reg, compiler::target::PointerBase::data_offset()));
offset = 0;
} else {
switch (array_cid) {
case kOneByteStringCid:
__ add(
array_reg, array_reg,
compiler::Operand(compiler::target::OneByteString::data_offset() -
kHeapObjectTag));
offset =
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
break;
case kTwoByteStringCid:
__ add(
array_reg, array_reg,
compiler::Operand(compiler::target::OneByteString::data_offset() -
kHeapObjectTag));
offset =
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
break;
case kExternalOneByteStringCid:
__ ldr(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalOneByteString::
external_data_offset()));
offset = 0;
break;
case kExternalTwoByteStringCid:
__ ldr(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalTwoByteString::
external_data_offset()));
offset = 0;
break;
default:
UNREACHABLE();
break;
}
}
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
if (start_loc.IsConstant()) {
const auto& constant = start_loc.constant();
ASSERT(constant.IsInteger());
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
const intptr_t add_value = start_value * element_size_ + offset;
__ AddImmediate(array_reg, add_value);
return;
}
__ AddImmediate(array_reg, offset);
const Register start_reg = start_loc.reg();
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
if (shift < 0) {
#if defined(DART_COMPRESSED_POINTERS)
Expand Down
26 changes: 17 additions & 9 deletions runtime/vm/compiler/backend/il_ia32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,23 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
locs->set_in(kSrcPos, Location::RequiresRegister());
locs->set_in(kDestPos, Location::RegisterLocation(EDI));
locs->set_in(kSrcStartPos, Location::WritableRegister());
locs->set_in(kDestStartPos, Location::WritableRegister());
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
locs->set_in(kLengthPos, Location::RegisterLocation(ECX));
return locs;
}

void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register src_reg = locs()->in(kSrcPos).reg();
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
const Location src_start_loc = locs()->in(kSrcStartPos);
const Location dest_start_loc = locs()->in(kDestStartPos);

// Save ESI which is THR.
__ pushl(ESI);
__ movl(ESI, src_reg);

EmitComputeStartPointer(compiler, src_cid_, src_start(), ESI, src_start_reg);
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), EDI,
dest_start_reg);
EmitComputeStartPointer(compiler, src_cid_, ESI, src_start_loc);
EmitComputeStartPointer(compiler, dest_cid_, EDI, dest_start_loc);
if (element_size_ <= 4) {
__ SmiUntag(ECX);
} else if (element_size_ == 16) {
Expand All @@ -128,9 +127,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {

void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
classid_t array_cid,
Value* start,
Register array_reg,
Register start_reg) {
Location start_loc) {
intptr_t offset;
if (IsTypedDataBaseClassId(array_cid)) {
__ movl(array_reg,
Expand Down Expand Up @@ -166,6 +164,16 @@ void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
break;
}
}
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
if (start_loc.IsConstant()) {
const auto& constant = start_loc.constant();
ASSERT(constant.IsInteger());
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
const intptr_t add_value = start_value * element_size_ + offset;
__ AddImmediate(array_reg, add_value);
return;
}
const Register start_reg = start_loc.reg();
ScaleFactor scale;
switch (element_size_) {
case 1:
Expand Down
42 changes: 26 additions & 16 deletions runtime/vm/compiler/backend/il_riscv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,23 +179,21 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
locs->set_in(kSrcPos, Location::WritableRegister());
locs->set_in(kDestPos, Location::WritableRegister());
locs->set_in(kSrcStartPos, Location::RequiresRegister());
locs->set_in(kDestStartPos, Location::RequiresRegister());
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
locs->set_in(kLengthPos, Location::WritableRegister());
return locs;
}

void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register src_reg = locs()->in(kSrcPos).reg();
const Register dest_reg = locs()->in(kDestPos).reg();
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
const Location src_start_loc = locs()->in(kSrcStartPos);
const Location dest_start_loc = locs()->in(kDestStartPos);
const Register length_reg = locs()->in(kLengthPos).reg();

EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
src_start_reg);
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
dest_start_reg);
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);

compiler::Label loop, done;

Expand Down Expand Up @@ -272,42 +270,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {

void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
classid_t array_cid,
Value* start,
Register array_reg,
Register start_reg) {
Location start_loc) {
intptr_t offset;
if (IsTypedDataBaseClassId(array_cid)) {
__ lx(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::PointerBase::data_offset()));
offset = 0;
} else {
switch (array_cid) {
case kOneByteStringCid:
__ addi(
array_reg, array_reg,
compiler::target::OneByteString::data_offset() - kHeapObjectTag);
offset =
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
break;
case kTwoByteStringCid:
__ addi(
array_reg, array_reg,
compiler::target::OneByteString::data_offset() - kHeapObjectTag);
offset =
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
break;
case kExternalOneByteStringCid:
__ lx(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalOneByteString::
external_data_offset()));
offset = 0;
break;
case kExternalTwoByteStringCid:
__ lx(array_reg,
compiler::FieldAddress(array_reg,
compiler::target::ExternalTwoByteString::
external_data_offset()));
offset = 0;
break;
default:
UNREACHABLE();
break;
}
}
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
if (start_loc.IsConstant()) {
const auto& constant = start_loc.constant();
ASSERT(constant.IsInteger());
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
const intptr_t add_value = start_value * element_size_ + offset;
__ AddImmediate(array_reg, add_value);
return;
}
__ AddImmediate(array_reg, offset);
const Register start_reg = start_loc.reg();
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
if (shift < 0) {
__ srai(TMP, start_reg, -shift);
Expand Down
Loading

0 comments on commit 1aa7a93

Please sign in to comment.