Skip to content

Commit

Permalink
Fix cord handling in DynamicMessage and oneofs.
Browse files Browse the repository at this point in the history
This fixes a memory corruption vulnerability for anyone using cord with dynamically built descriptor pools.

PiperOrigin-RevId: 676091224
  • Loading branch information
mkruskal-google authored and copybara-github committed Sep 18, 2024
1 parent aa5818d commit 9e8b30c
Show file tree
Hide file tree
Showing 34 changed files with 318 additions and 178 deletions.
2 changes: 2 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ build:ubsan --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX
# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748
build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr
# Abseil passes nullptr to memcmp with 0 size
build:ubsan --copt=-fno-sanitize=nonnull-attribute

# TODO: migrate all dependencies from WORKSPACE to MODULE.bazel
# https://github.com/protocolbuffers/protobuf/issues/14313
Expand Down
2 changes: 2 additions & 0 deletions ci/common.bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ build:ubsan --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX
# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748
build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr
# Abseil passes nullptr to memcmp with 0 size
build:ubsan --copt=-fno-sanitize=nonnull-attribute

# Workaround Bazel 7 remote cache issues.
# See https://github.com/bazelbuild/bazel/issues/20161
Expand Down
2 changes: 1 addition & 1 deletion python/google/protobuf/internal/descriptor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ def CheckDescriptorMapping(self, mapping):
excepted_dict['new_key'] = 'new'
self.assertNotEqual(mapping, excepted_dict)
self.assertRaises(KeyError, mapping.__getitem__, 'key_error')
self.assertRaises(KeyError, mapping.__getitem__, len(mapping) + 1)
self.assertRaises(KeyError, mapping.__getitem__, len(mapping) * 2)
# TODO: Add __repr__ support for DescriptorMapping.
if api_implementation.Type() == 'cpp':
self.assertEqual(str(mapping)[0], '<')
Expand Down
2 changes: 1 addition & 1 deletion python/google/protobuf/internal/field_mask_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def testDescriptorToFieldMask(self):
mask = field_mask_pb2.FieldMask()
msg_descriptor = unittest_pb2.TestAllTypes.DESCRIPTOR
mask.AllFieldsFromDescriptor(msg_descriptor)
self.assertEqual(79, len(mask.paths))
self.assertEqual(80, len(mask.paths))
self.assertTrue(mask.IsValidForDescriptor(msg_descriptor))
for field in msg_descriptor.fields:
self.assertTrue(field.name in mask.paths)
Expand Down
3 changes: 3 additions & 0 deletions python/google/protobuf/internal/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def SetAllNonLazyFields(message):

message.optional_string_piece = u'124'
message.optional_cord = u'125'
message.optional_bytes_cord = b'optional bytes cord'

#
# Repeated fields.
Expand Down Expand Up @@ -247,6 +248,7 @@ def SetAllExtensions(message):

extensions[pb2.optional_string_piece_extension] = u'124'
extensions[pb2.optional_cord_extension] = u'125'
extensions[pb2.optional_bytes_cord_extension] = b'optional bytes cord'

#
# Repeated fields.
Expand Down Expand Up @@ -423,6 +425,7 @@ def ExpectAllFieldsSet(test_case, message):

test_case.assertTrue(message.HasField('optional_string_piece'))
test_case.assertTrue(message.HasField('optional_cord'))
test_case.assertTrue(message.HasField('optional_bytes_cord'))

test_case.assertEqual(101, message.optional_int32)
test_case.assertEqual(102, message.optional_int64)
Expand Down
6 changes: 3 additions & 3 deletions python/google/protobuf/internal/unknown_fields_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def testCheckUnknownFieldValue(self):
unknown_field_set,
(17, 0, 117))

self.assertEqual(98, len(unknown_field_set))
self.assertEqual(99, len(unknown_field_set))

def testCopyFrom(self):
message = unittest_pb2.TestEmptyMessage()
Expand Down Expand Up @@ -250,7 +250,7 @@ def testClear(self):
self.empty_message.Clear()
# All cleared, even unknown fields.
self.assertEqual(self.empty_message.SerializeToString(), b'')
self.assertEqual(len(unknown_field_set), 98)
self.assertEqual(len(unknown_field_set), 99)

@unittest.skipIf((sys.version_info.major, sys.version_info.minor) < (3, 4),
'tracemalloc requires python 3.4+')
Expand Down Expand Up @@ -309,7 +309,7 @@ def testUnknownField(self):
def testUnknownExtensions(self):
message = unittest_pb2.TestEmptyMessageWithExtensions()
message.ParseFromString(self.all_fields_data)
self.assertEqual(len(unknown_fields.UnknownFieldSet(message)), 98)
self.assertEqual(len(unknown_fields.UnknownFieldSet(message)), 99)
self.assertEqual(message.SerializeToString(), self.all_fields_data)


Expand Down
43 changes: 4 additions & 39 deletions src/google/protobuf/compiler/cpp/field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ FieldGeneratorBase::FieldGeneratorBase(const FieldDescriptor* field,
break;
case FieldDescriptor::CPPTYPE_STRING:
is_string_ = true;
string_type_ = field->options().ctype();
is_inlined_ = IsStringInlined(field, options);
is_bytes_ = field->type() == FieldDescriptor::TYPE_BYTES;
has_default_constexpr_constructor_ = is_repeated_or_map;
Expand Down Expand Up @@ -229,40 +228,6 @@ void FieldGeneratorBase::GenerateCopyConstructorCode(io::Printer* p) const {
}

namespace {
// Use internal types instead of ctype or string_type.
enum class StringType {
kView,
kString,
kCord,
kStringPiece,
};

StringType GetStringType(const FieldDescriptor& field) {
ABSL_CHECK_EQ(field.cpp_type(), FieldDescriptor::CPPTYPE_STRING);

if (field.options().has_ctype()) {
switch (field.options().ctype()) {
case FieldOptions::CORD:
return StringType::kCord;
case FieldOptions::STRING_PIECE:
return StringType::kStringPiece;
default:
return StringType::kString;
}
}

const pb::CppFeatures& cpp_features =
CppGenerator::GetResolvedSourceFeatures(field).GetExtension(::pb::cpp);
switch (cpp_features.string_type()) {
case pb::CppFeatures::CORD:
return StringType::kCord;
case pb::CppFeatures::VIEW:
return StringType::kView;
default:
return StringType::kString;
}
}

std::unique_ptr<FieldGeneratorBase> MakeGenerator(const FieldDescriptor* field,
const Options& options,
MessageSCCAnalyzer* scc) {
Expand All @@ -279,7 +244,7 @@ std::unique_ptr<FieldGeneratorBase> MakeGenerator(const FieldDescriptor* field,
case FieldDescriptor::CPPTYPE_MESSAGE:
return MakeRepeatedMessageGenerator(field, options, scc);
case FieldDescriptor::CPPTYPE_STRING: {
if (GetStringType(*field) == StringType::kView) {
if (field->cpp_string_type() == FieldDescriptor::CppStringType::kView) {
return MakeRepeatedStringViewGenerator(field, options, scc);
} else {
return MakeRepeatedStringGenerator(field, options, scc);
Expand All @@ -303,10 +268,10 @@ std::unique_ptr<FieldGeneratorBase> MakeGenerator(const FieldDescriptor* field,
case FieldDescriptor::CPPTYPE_ENUM:
return MakeSinguarEnumGenerator(field, options, scc);
case FieldDescriptor::CPPTYPE_STRING: {
switch (GetStringType(*field)) {
case StringType::kView:
switch (field->cpp_string_type()) {
case FieldDescriptor::CppStringType::kView:
return MakeSingularStringViewGenerator(field, options, scc);
case StringType::kCord:
case FieldDescriptor::CppStringType::kCord:
if (field->type() == FieldDescriptor::TYPE_BYTES) {
if (field->real_containing_oneof()) {
return MakeOneofCordGenerator(field, options, scc);
Expand Down
5 changes: 0 additions & 5 deletions src/google/protobuf/compiler/cpp/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ class FieldGeneratorBase {
// Returns true if the field API uses bytes (void) instead of chars.
bool is_bytes() const { return is_bytes_; }

// Returns the public API string type for string fields.
FieldOptions::CType string_type() const { return string_type_; }

// Returns true if this field is part of a oneof field.
bool is_oneof() const { return is_oneof_; }

Expand Down Expand Up @@ -217,7 +214,6 @@ class FieldGeneratorBase {
bool is_lazy_ = false;
bool is_weak_ = false;
bool is_oneof_ = false;
FieldOptions::CType string_type_ = FieldOptions::STRING;
bool has_default_constexpr_constructor_ = false;
};

Expand Down Expand Up @@ -269,7 +265,6 @@ class FieldGenerator {
bool is_foreign() const { return impl_->is_foreign(); }
bool is_string() const { return impl_->is_string(); }
bool is_bytes() const { return impl_->is_bytes(); }
FieldOptions::CType string_type() const { return impl_->string_type(); }
bool is_oneof() const { return impl_->is_oneof(); }
bool is_inlined() const { return impl_->is_inlined(); }
bool has_default_constexpr_constructor() const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,6 @@ void SingularStringView::GenerateStaticMembers(io::Printer* p) const {
}

void SingularStringView::GenerateAccessorDeclarations(io::Printer* p) const {
ABSL_CHECK(!field_->options().has_ctype());

auto v1 = p->WithVars(AnnotatedAccessors(field_, {""}));
auto v2 = p->WithVars(
AnnotatedAccessors(field_, {"set_"}, AnnotationCollector::kSet));
Expand Down
3 changes: 2 additions & 1 deletion src/google/protobuf/compiler/cpp/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,8 @@ MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
switch (field->type()) {
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES: {
if (field->options().ctype() == FieldOptions::CORD) {
if (field->cpp_string_type() ==
FieldDescriptor::CppStringType::kCord) {
result.contains_cord = true;
}
break;
Expand Down
10 changes: 6 additions & 4 deletions src/google/protobuf/compiler/cpp/tracker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ Getters RepeatedFieldGetters(const FieldDescriptor* field,

Getters StringFieldGetters(const FieldDescriptor* field, const Options& opts) {
std::string member = FieldMemberName(field, ShouldSplit(field, opts));
bool is_std_string = field->options().ctype() == FieldOptions::STRING;
bool is_std_string =
field->cpp_string_type() == FieldDescriptor::CppStringType::kString;

Getters getters;
if (is_std_string && !field->default_value_string().empty()) {
Expand All @@ -241,7 +242,8 @@ Getters StringOneofGetters(const FieldDescriptor* field,
ABSL_CHECK(oneof != nullptr);

std::string member = FieldMemberName(field, ShouldSplit(field, opts));
bool is_std_string = field->options().ctype() == FieldOptions::STRING;
bool is_std_string =
field->cpp_string_type() == FieldDescriptor::CppStringType::kString;

std::string field_ptr = member;
if (is_std_string) {
Expand All @@ -258,8 +260,8 @@ Getters StringOneofGetters(const FieldDescriptor* field,
}

Getters getters;
if (field->default_value_string().empty() ||
field->options().ctype() == FieldOptions::STRING_PIECE) {
if (field->default_value_string().empty()
) {
getters.base = absl::Substitute("$0 ? $1 : nullptr", has, field_ptr);
} else {
getters.base =
Expand Down
4 changes: 4 additions & 0 deletions src/google/protobuf/compiler/cpp/unittest.inc
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ TEST(GENERATED_MESSAGE_TEST_NAME, SwapWithOther) {
message1.set_optional_string("abc");
message1.mutable_optional_nested_message()->set_bb(1);
message1.set_optional_nested_enum(UNITTEST::TestAllTypes::FOO);
message1.set_optional_bytes_cord("bytes cord");
message1.add_repeated_int32(1);
message1.add_repeated_int32(2);
message1.add_repeated_string("a");
Expand All @@ -441,6 +442,7 @@ TEST(GENERATED_MESSAGE_TEST_NAME, SwapWithOther) {
message2.set_optional_string("def");
message2.mutable_optional_nested_message()->set_bb(2);
message2.set_optional_nested_enum(UNITTEST::TestAllTypes::BAR);
message2.set_optional_bytes_cord("bytes cord");
message2.add_repeated_int32(3);
message2.add_repeated_string("c");
message2.add_repeated_nested_message()->set_bb(9);
Expand All @@ -452,6 +454,7 @@ TEST(GENERATED_MESSAGE_TEST_NAME, SwapWithOther) {
EXPECT_EQ("def", message1.optional_string());
EXPECT_EQ(2, message1.optional_nested_message().bb());
EXPECT_EQ(UNITTEST::TestAllTypes::BAR, message1.optional_nested_enum());
EXPECT_EQ(absl::Cord("bytes cord"), message1.optional_bytes_cord());
ASSERT_EQ(1, message1.repeated_int32_size());
EXPECT_EQ(3, message1.repeated_int32(0));
ASSERT_EQ(1, message1.repeated_string_size());
Expand All @@ -465,6 +468,7 @@ TEST(GENERATED_MESSAGE_TEST_NAME, SwapWithOther) {
EXPECT_EQ("abc", message2.optional_string());
EXPECT_EQ(1, message2.optional_nested_message().bb());
EXPECT_EQ(UNITTEST::TestAllTypes::FOO, message2.optional_nested_enum());
EXPECT_EQ(absl::Cord("bytes cord"), message2.optional_bytes_cord());
ASSERT_EQ(2, message2.repeated_int32_size());
EXPECT_EQ(1, message2.repeated_int32(0));
EXPECT_EQ(2, message2.repeated_int32(1));
Expand Down
3 changes: 2 additions & 1 deletion src/google/protobuf/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2954,7 +2954,8 @@ PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field);
template <typename FieldDesc = FieldDescriptor,
typename FieldOpts = FieldOptions>
typename FieldOpts::CType EffectiveStringCType(const FieldDesc* field) {
// TODO Replace this function with FieldDescriptor::string_type;
// TODO Replace this function with
// FieldDescriptor::cpp_string_type;
switch (field->cpp_string_type()) {
case FieldDescriptor::CppStringType::kCord:
return FieldOpts::CORD;
Expand Down
4 changes: 3 additions & 1 deletion src/google/protobuf/descriptor_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7571,7 +7571,7 @@ TEST_F(FeaturesTest, Proto2Features) {
name: "cord"
number: 8
label: LABEL_OPTIONAL
type: TYPE_STRING
type: TYPE_BYTES
options { ctype: CORD }
}
field {
Expand Down Expand Up @@ -7656,6 +7656,8 @@ TEST_F(FeaturesTest, Proto2Features) {

EXPECT_EQ(message->FindFieldByName("str")->cpp_string_type(),
FieldDescriptor::CppStringType::kString);
EXPECT_EQ(message->FindFieldByName("cord")->cpp_string_type(),
FieldDescriptor::CppStringType::kCord);

// Check round-trip consistency.
FileDescriptorProto proto;
Expand Down
Loading

0 comments on commit 9e8b30c

Please sign in to comment.