From 72b0b7afbabce7494c2605aa240b0f0ca98aa6e5 Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Thu, 18 Jul 2024 10:41:30 -0700 Subject: [PATCH] Introduce FieldDescriptor::cpp_string_type() API to replace direct ctype inspection which will be removed in the next breaking change This should provide the roughly same result as ctype, except that it reflects actual behavior rather than the specification in the proto file. VIEW will now be visible, and some subtleties around CORD and PIECE will change. PiperOrigin-RevId: 653677655 --- src/google/protobuf/descriptor.cc | 23 +++++++ src/google/protobuf/descriptor.h | 19 +++--- src/google/protobuf/descriptor_lite.h | 11 ++++ src/google/protobuf/descriptor_unittest.cc | 70 ++++++++++++++++++++++ 4 files changed, 115 insertions(+), 8 deletions(-) diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index fda5dc4d7a45..42f8e26fdc53 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -3941,6 +3941,29 @@ bool FieldDescriptor::has_optional_keyword() const { is_optional() && !containing_oneof()); } +FieldDescriptor::CppStringType FieldDescriptor::cpp_string_type() const { + ABSL_DCHECK(cpp_type() == FieldDescriptor::CPPTYPE_STRING); + switch (features().GetExtension(pb::cpp).string_type()) { + case pb::CppFeatures::VIEW: + return CppStringType::kView; + case pb::CppFeatures::CORD: + // In open-source, protobuf CORD is only supported for singular bytes + // fields. + if (type() != FieldDescriptor::TYPE_BYTES || is_repeated() || + is_extension()) { + return CppStringType::kString; + } + return CppStringType::kCord; + case pb::CppFeatures::STRING: + return CppStringType::kString; + default: + // If features haven't been resolved, this is a dynamic build not for C++ + // codegen. Just use string type. + ABSL_DCHECK(!features().GetExtension(pb::cpp).has_string_type()); + return CppStringType::kString; + } +} + // Location methods =============================================== bool FileDescriptor::GetSourceLocation(const std::vector& path, diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index 2a1dfb59de51..a38bb9cb3115 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -859,6 +859,10 @@ class PROTOBUF_EXPORT FieldDescriptor : private internal::SymbolBase, const char* cpp_type_name() const; // Name of the C++ type. Label label() const; // optional/required/repeated +#ifndef SWIG + CppStringType cpp_string_type() const; // The C++ string type of this field. +#endif + bool is_required() const; // shorthand for label() == LABEL_REQUIRED bool is_optional() const; // shorthand for label() == LABEL_OPTIONAL bool is_repeated() const; // shorthand for label() == LABEL_REPEATED @@ -2892,22 +2896,21 @@ PROTOBUF_EXPORT bool HasPreservingUnknownEnumSemantics( PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field); +#ifndef SWIG // For a string field, returns the effective ctype. If the actual ctype is // not supported, returns the default of STRING. template typename FieldOpts::CType EffectiveStringCType(const FieldDesc* field) { - ABSL_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING); - // Open-source protobuf release only supports STRING ctype and CORD for - // sinuglar bytes. - if (field->type() == FieldDescriptor::TYPE_BYTES && !field->is_repeated() && - field->options().ctype() == FieldOpts::CORD && !field->is_extension()) { - return FieldOpts::CORD; + // TODO Replace this function with FieldDescriptor::string_type; + switch (field->cpp_string_type()) { + case FieldDescriptor::CppStringType::kCord: + return FieldOpts::CORD; + default: + return FieldOpts::STRING; } - return FieldOpts::STRING; } -#ifndef SWIG enum class Utf8CheckMode : uint8_t { kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields. kVerify = 1, // Only log an error but parsing will succeed. diff --git a/src/google/protobuf/descriptor_lite.h b/src/google/protobuf/descriptor_lite.h index db5805affee3..e1a90b7fa60d 100644 --- a/src/google/protobuf/descriptor_lite.h +++ b/src/google/protobuf/descriptor_lite.h @@ -78,6 +78,17 @@ class FieldDescriptorLite { MAX_LABEL = 3, // Constant useful for defining lookup tables // indexed by Label. }; + + // Identifies the storage type of a C++ string field. This corresponds to + // pb.CppFeatures.StringType, but is compatible with ctype prior to Edition + // 2024. 0 is reserved for errors. +#ifndef SWIG + enum class CppStringType { + kView = 1, + kCord = 2, + kString = 3, + }; +#endif }; } // namespace internal diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index 3529aca8f900..ac4e9fd43da9 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -7593,6 +7593,9 @@ TEST_F(FeaturesTest, Proto2Features) { EXPECT_TRUE(message->FindFieldByName("req")->is_required()); EXPECT_TRUE(file->enum_type(0)->is_closed()); + EXPECT_EQ(message->FindFieldByName("str")->cpp_string_type(), + FieldDescriptor::CppStringType::kString); + // Check round-trip consistency. FileDescriptorProto proto; file->CopyTo(&proto); @@ -9709,6 +9712,73 @@ TEST_F(FeaturesTest, EnumFeatureHelpers) { EXPECT_FALSE(HasPreservingUnknownEnumSemantics(field_legacy_closed)); } +TEST_F(FeaturesTest, FieldCppStringType) { + BuildDescriptorMessagesInTestPool(); + const std::string file_contents = absl::Substitute( + R"pb( + name: "foo.proto" + syntax: "editions" + edition: EDITION_2024 + message_type { + name: "Foo" + field { + name: "view" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "str" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + features { + [pb.cpp] { string_type: STRING } + } + } + } + field { + name: "cord" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + features { + [pb.cpp] { string_type: CORD } + } + } + } + field { + name: "cord_bytes" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + options { + features { + [pb.cpp] { string_type: CORD } + } + } + } $0 + } + )pb", + "" + ); + const FileDescriptor* file = BuildFile(file_contents); + const Descriptor* message = file->message_type(0); + const FieldDescriptor* view = message->field(0); + const FieldDescriptor* str = message->field(1); + const FieldDescriptor* cord = message->field(2); + const FieldDescriptor* cord_bytes = message->field(3); + + EXPECT_EQ(view->cpp_string_type(), FieldDescriptor::CppStringType::kView); + EXPECT_EQ(str->cpp_string_type(), FieldDescriptor::CppStringType::kString); + EXPECT_EQ(cord_bytes->cpp_string_type(), + FieldDescriptor::CppStringType::kCord); + EXPECT_EQ(cord->cpp_string_type(), FieldDescriptor::CppStringType::kString); + +} + TEST_F(FeaturesTest, MergeFeatureValidationFailed) { BuildDescriptorMessagesInTestPool(); BuildFileInTestPool(pb::TestFeatures::descriptor()->file());