Skip to content

Commit

Permalink
Support split repeated fields.
Browse files Browse the repository at this point in the history
Design details:
- We store the split repeated fields inline in the Split struct by pointer. We use pointers so that we can save memory when the fields are absent and so that the split struct will still be trivially copyable.
- If we had instead stored the repeated fields by value, then we would need to add virtual functions to each Message that would call the constructors of the repeated fields which we would call during parsing after allocating non-default split structs.

PiperOrigin-RevId: 549673919
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Jul 20, 2023
1 parent 4bdd53a commit 5b5e5bf
Show file tree
Hide file tree
Showing 18 changed files with 686 additions and 215 deletions.
134 changes: 105 additions & 29 deletions src/google/protobuf/compiler/cpp/field_generators/enum_field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,22 @@ class RepeatedEnum : public FieldGeneratorBase {
field_(field),
opts_(&opts),
has_cached_size_(field_->is_packed() &&
HasGeneratedMethods(field_->file(), opts)) {}
HasGeneratedMethods(field_->file(), opts) &&
!ShouldSplit(descriptor_, options_)) {}
~RepeatedEnum() override = default;

std::vector<Sub> MakeVars() const override { return Vars(field_, *opts_); }

void GeneratePrivateMembers(io::Printer* p) const override {
p->Emit(R"cc(
$pb$::RepeatedField<int> $name$_;
)cc");
if (ShouldSplit(descriptor_, options_)) {
p->Emit(R"cc(
$pbi$::RawPtr<$pb$::RepeatedField<int>> $name$_;
)cc");
} else {
p->Emit(R"cc(
$pb$::RepeatedField<int> $name$_;
)cc");
}

if (has_cached_size_) {
p->Emit(R"cc(
Expand All @@ -266,9 +273,22 @@ class RepeatedEnum : public FieldGeneratorBase {
}

void GenerateMergingCode(io::Printer* p) const override {
p->Emit(R"cc(
_this->_internal_mutable_$name$()->MergeFrom(from._internal_$name$());
)cc");
// TODO(b/239716377): experiment with simplifying this to be
// `if (!from.empty()) { body(); }` for both split and non-split cases.
auto body = [&] {
p->Emit(R"cc(
_this->_internal_mutable_$name$()->MergeFrom(from._internal_$name$());
)cc");
};
if (!ShouldSplit(descriptor_, options_)) {
body();
} else {
p->Emit({{"body", body}}, R"cc(
if (!from.$field_$.IsDefault()) {
$body$;
}
)cc");
}
}

void GenerateSwappingCode(io::Printer* p) const override {
Expand All @@ -279,9 +299,15 @@ class RepeatedEnum : public FieldGeneratorBase {
}

void GenerateDestructorCode(io::Printer* p) const override {
p->Emit(R"cc(
_internal_mutable_$name$()->~RepeatedField();
)cc");
if (ShouldSplit(descriptor_, options_)) {
p->Emit(R"cc(
$field_$.DeleteIfNotDefault();
)cc");
} else {
p->Emit(R"cc(
_internal_mutable_$name$()->~RepeatedField();
)cc");
}
}

void GenerateConstexprAggregateInitializer(io::Printer* p) const override {
Expand Down Expand Up @@ -321,7 +347,13 @@ class RepeatedEnum : public FieldGeneratorBase {
}

void GenerateCopyConstructorCode(io::Printer* p) const override {
ABSL_CHECK(!ShouldSplit(field_, *opts_));
if (ShouldSplit(descriptor_, options_)) {
p->Emit(R"cc(
if (!from._internal_$name$().empty()) {
_internal_mutable_$name$()->MergeFrom(from._internal_$name$());
}
)cc");
}
}

void GenerateConstructorCode(io::Printer* p) const override {}
Expand Down Expand Up @@ -392,29 +424,69 @@ void RepeatedEnum::GenerateInlineAccessorDefinitions(io::Printer* p) const {
$TsanDetectConcurrentMutation$;
return _internal_mutable_$name$();
}
inline const $pb$::RepeatedField<int>& $Msg$::_internal_$name$() const {
$TsanDetectConcurrentRead$;
return $field_$;
}
inline $pb$::RepeatedField<int>* $Msg$::_internal_mutable_$name$() {
$TsanDetectConcurrentRead$;
return &$field_$;
}
)cc");
if (ShouldSplit(descriptor_, options_)) {
p->Emit(R"cc(
inline const $pb$::RepeatedField<int>& $Msg$::_internal_$name$() const {
$TsanDetectConcurrentRead$;
return *$field_$;
}
inline $pb$::RepeatedField<int>* $Msg$::_internal_mutable_$name$() {
$TsanDetectConcurrentRead$;
$PrepareSplitMessageForWrite$;
if ($field_$.IsDefault()) {
$field_$.Set($pb$::Arena::CreateMessage<$pb$::RepeatedField<int>>(
GetArenaForAllocation()));
}
return $field_$.Get();
}
)cc");
} else {
p->Emit(R"cc(
inline const $pb$::RepeatedField<int>& $Msg$::_internal_$name$() const {
$TsanDetectConcurrentRead$;
return $field_$;
}
inline $pb$::RepeatedField<int>* $Msg$::_internal_mutable_$name$() {
$TsanDetectConcurrentRead$;
return &$field_$;
}
)cc");
}
}

void RepeatedEnum::GenerateSerializeWithCachedSizesToArray(
io::Printer* p) const {
if (field_->is_packed()) {
p->Emit(R"cc(
{
int byte_size = $cached_size_$.Get();
if (byte_size > 0) {
target = stream->WriteEnumPacked($number$, _internal_$name$(),
byte_size, target);
}
}
)cc");
p->Emit(
{
{"byte_size",
[&] {
if (has_cached_size_) {
p->Emit(
R"cc(std::size_t byte_size = $cached_size_$.Get();)cc");
} else {
p->Emit(R"cc(
std::size_t byte_size = 0;
auto count = static_cast<std::size_t>(this->_internal_$name$_size());
for (std::size_t i = 0; i < count; ++i) {
byte_size += ::_pbi::WireFormatLite::EnumSize(
this->_internal_$name$().Get(static_cast<int>(i)));
}
)cc");
}
}},
},
R"cc(
{
$byte_size$;
if (byte_size > 0) {
target = stream->WriteEnumPacked($number$, _internal_$name$(),
byte_size, target);
}
}
)cc");
return;
}
p->Emit(R"cc(
Expand Down Expand Up @@ -445,8 +517,12 @@ void RepeatedEnum::GenerateByteSize(io::Printer* p) const {
total_size += ::_pbi::WireFormatLite::Int32Size(
static_cast<int32_t>(data_size));
}
$cached_size_$.Set(::_pbi::ToCachedSize(data_size));
)cc");
if (has_cached_size_) {
p->Emit(R"cc(
$cached_size_$.Set(::_pbi::ToCachedSize(data_size));
)cc");
}
}},
},
R"cc(
Expand Down
99 changes: 80 additions & 19 deletions src/google/protobuf/compiler/cpp/field_generators/message_field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,7 @@ class RepeatedMessage : public FieldGeneratorBase {
field_(field),
opts_(&opts),
weak_(IsImplicitWeakField(field, opts, scc)),
split_(ShouldSplit(field, opts)),
has_required_(scc->HasRequiredFields(field->message_type())) {}

~RepeatedMessage() override = default;
Expand All @@ -735,7 +736,7 @@ class RepeatedMessage : public FieldGeneratorBase {
void GenerateMergingCode(io::Printer* p) const override;
void GenerateSwappingCode(io::Printer* p) const override;
void GenerateConstructorCode(io::Printer* p) const override;
void GenerateCopyConstructorCode(io::Printer* p) const override {}
void GenerateCopyConstructorCode(io::Printer* p) const override;
void GenerateDestructorCode(io::Printer* p) const override;
void GenerateSerializeWithCachedSizesToArray(io::Printer* p) const override;
void GenerateByteSize(io::Printer* p) const override;
Expand All @@ -745,11 +746,18 @@ class RepeatedMessage : public FieldGeneratorBase {
const FieldDescriptor* field_;
const Options* opts_;
bool weak_;
bool split_;
bool has_required_;
};

void RepeatedMessage::GeneratePrivateMembers(io::Printer* p) const {
p->Emit("$pb$::$Weak$RepeatedPtrField< $Submsg$ > $name$_;\n");
if (split_) {
p->Emit(R"cc(
$pbi$::RawPtr<$pb$::$Weak$RepeatedPtrField<$Submsg$>> $name$_;
)cc");
} else {
p->Emit("$pb$::$Weak$RepeatedPtrField< $Submsg$ > $name$_;\n");
}
}

void RepeatedMessage::GenerateAccessorDeclarations(io::Printer* p) const {
Expand Down Expand Up @@ -831,18 +839,39 @@ void RepeatedMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const {
" return _internal_$name$();\n"
"}\n");

p->Emit(R"cc(
inline const $pb$::$Weak$RepeatedPtrField<$Submsg$>&
$Msg$::_internal$_weak$_$name$() const {
$TsanDetectConcurrentRead$;
return $field_$;
}
inline $pb$::$Weak$RepeatedPtrField<$Submsg$>*
$Msg$::_internal_mutable$_weak$_$name$() {
$TsanDetectConcurrentRead$;
return &$field_$;
}
)cc");
if (split_) {
p->Emit(R"cc(
inline const $pb$::$Weak$RepeatedPtrField<$Submsg$>&
$Msg$::_internal$_weak$_$name$() const {
$TsanDetectConcurrentRead$;
return *$field_$;
}
inline $pb$::$Weak$RepeatedPtrField<$Submsg$>*
$Msg$::_internal_mutable$_weak$_$name$() {
$TsanDetectConcurrentRead$;
$PrepareSplitMessageForWrite$;
if ($field_$.IsDefault()) {
$field_$.Set(
CreateMaybeMessage<$pb$::$Weak$RepeatedPtrField<$Submsg$>>(
GetArenaForAllocation()));
}
return $field_$.Get();
}
)cc");
} else {
p->Emit(R"cc(
inline const $pb$::$Weak$RepeatedPtrField<$Submsg$>&
$Msg$::_internal$_weak$_$name$() const {
$TsanDetectConcurrentRead$;
return $field_$;
}
inline $pb$::$Weak$RepeatedPtrField<$Submsg$>*
$Msg$::_internal_mutable$_weak$_$name$() {
$TsanDetectConcurrentRead$;
return &$field_$;
}
)cc");
}
if (weak_) {
p->Emit(R"cc(
inline const $pb$::RepeatedPtrField<$Submsg$>& $Msg$::_internal_$name$()
Expand All @@ -861,13 +890,27 @@ void RepeatedMessage::GenerateClearingCode(io::Printer* p) const {
}

void RepeatedMessage::GenerateMergingCode(io::Printer* p) const {
p->Emit(
"_this->_internal_mutable$_weak$_$name$()->MergeFrom(from._internal"
"$_weak$_$name$());\n");
// TODO(b/239716377): experiment with simplifying this to be
// `if (!from.empty()) { body(); }` for both split and non-split cases.
auto body = [&] {
p->Emit(R"cc(
_this->_internal_mutable$_weak$_$name$()->MergeFrom(
from._internal$_weak$_$name$());
)cc");
};
if (!split_) {
body();
} else {
p->Emit({{"body", body}}, R"cc(
if (!from.$field_$.IsDefault()) {
$body$;
}
)cc");
}
}

void RepeatedMessage::GenerateSwappingCode(io::Printer* p) const {
ABSL_CHECK(!ShouldSplit(descriptor_, options_));
ABSL_CHECK(!split_);
p->Emit(R"cc(
$field_$.InternalSwap(&other->$field_$);
)cc");
Expand All @@ -877,8 +920,26 @@ void RepeatedMessage::GenerateConstructorCode(io::Printer* p) const {
// Not needed for repeated fields.
}

void RepeatedMessage::GenerateCopyConstructorCode(io::Printer* p) const {
// TODO(b/291633281): For split repeated fields we might want to use type
// erasure to reduce binary size costs.
if (split_) {
p->Emit(R"cc(
if (!from._internal$_weak$_$name$().empty()) {
_internal_mutable$_weak$_$name$()->MergeFrom(from._internal$_weak$_$name$());
}
)cc");
}
}

void RepeatedMessage::GenerateDestructorCode(io::Printer* p) const {
p->Emit("$field_$.~$Weak$RepeatedPtrField();\n");
if (split_) {
p->Emit(R"cc(
$field_$.DeleteIfNotDefault();
)cc");
} else {
p->Emit("$field_$.~$Weak$RepeatedPtrField();\n");
}
}

void RepeatedMessage::GenerateSerializeWithCachedSizesToArray(
Expand Down
Loading

0 comments on commit 5b5e5bf

Please sign in to comment.