From 0d1ec21a808dbaf54bff4201cbd2173d29924353 Mon Sep 17 00:00:00 2001 From: Ufuk Kayserilioglu Date: Fri, 23 Jun 2023 00:34:24 +0300 Subject: [PATCH 1/3] Add failing tests --- spec/tapioca/dsl/compilers/protobuf_spec.rb | 114 ++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/spec/tapioca/dsl/compilers/protobuf_spec.rb b/spec/tapioca/dsl/compilers/protobuf_spec.rb index a68ad9e16..aafaf8964 100644 --- a/spec/tapioca/dsl/compilers/protobuf_spec.rb +++ b/spec/tapioca/dsl/compilers/protobuf_spec.rb @@ -543,6 +543,120 @@ def fields=(value); end RBI assert_equal(expected, rbi_for("Google::Protobuf::Struct")) end + + it "handles map and repeating types with anonymous subtype message classes by mapping them to T.untyped" do + add_ruby_file("protobuf.rb", <<~RUBY) + Google::Protobuf::DescriptorPool.generated_pool.build do + add_file("cart.proto", :syntax => :proto3) do + add_message "CartEntry" do + optional :key, :string, 1 + optional :value, :string, 2 + end + add_message "NonMapEntry" do + optional :key, :string, 1 + optional :value, :string, 2 + end + add_message "MyCart" do + map :tables, :string, :message, 1, "CartEntry" + repeated :items, :message, 2, "NonMapEntry" + end + end + end + + Cart = Google::Protobuf::DescriptorPool.generated_pool.lookup("MyCart").msgclass + # we intentionally don't define `CartEntry` or `NonMapEntry` as constants, so that + # those message classes stay anonymous, which we expect to be mapped to `T.untyped` + RUBY + + expected = <<~RBI + # typed: strong + + class Cart + sig { params(items: T.nilable(T.any(Google::Protobuf::RepeatedField[T.untyped], T::Array[T.untyped])), tables: T.nilable(T.any(Google::Protobuf::Map[String, T.untyped], T::Hash[String, T.untyped]))).void } + def initialize(items: Google::Protobuf::RepeatedField.new(:message, T.untyped), tables: Google::Protobuf::Map.new(:string, :message, T.untyped)); end + + sig { void } + def clear_items; end + + sig { void } + def clear_tables; end + + sig { returns(Google::Protobuf::RepeatedField[T.untyped]) } + def items; end + + sig { params(value: Google::Protobuf::RepeatedField[T.untyped]).void } + def items=(value); end + + sig { returns(Google::Protobuf::Map[String, T.untyped]) } + def tables; end + + sig { params(value: Google::Protobuf::Map[String, T.untyped]).void } + def tables=(value); end + end + RBI + + assert_equal(expected, rbi_for(:Cart)) + end + + it "handles map types regardless of their name" do + # This is test is based on this definition from `google-cloud-bigtable` gem that was causing issues: + # https://github.com/googleapis/google-cloud-ruby/blob/9de1ce5bf74105383fc46060600d5293f8692035/google-cloud-bigtable-admin-v2/lib/google/bigtable/admin/v2/bigtable_instance_admin_pb.rb#L20 + add_ruby_file("protobuf.rb", <<~RUBY) + require "base64" + require 'google/protobuf' + + # This is how the new protobuf compiler (protoc) generates `xx_pb.rb` files. + # It embeds the descriptor data as binary into a string and parses it into the pool. + # The following is a simplified result of running `protoc --ruby_out=. cart.proto` with `cart.proto` as: + # ``` + # syntax = "proto3"; + # + # message MyCart { + # message Progress { + # enum State { + # STATE_UNSPECIFIED = 0; + # PENDING = 1; + # COMPLETED = 2; + # } + # + # State state = 4; + # } + # + # map progress = 4; + # } + # ``` + # which is based on the failing case from https://github.com/googleapis/googleapis/blob/master/google/bigtable/admin/v2/bigtable_instance_admin.proto#L486-L536 + # + # I encoded the data as Base64 since embedding the binary string was giving me invalid byte sequence errors. + descriptor_data = Base64.decode64("CgpjYXJ0LnByb3RvIuMBCgZNeUNhcnQSJwoIcHJvZ3Jlc3MYBCADKAsyFS5NeUNhcnQuUHJvZ3Jlc3NFbnRyeRptCghQcm9ncmVzcxIlCgVzdGF0ZRgEIAEoDjIWLk15Q2FydC5Qcm9ncmVzcy5TdGF0ZSI6CgVTdGF0ZRIVChFTVEFURV9VTlNQRUNJRklFRBAAEgsKB1BFTkRJTkcQARINCglDT01QTEVURUQQAhpBCg1Qcm9ncmVzc0VudHJ5EgsKA2tleRgBIAEoCRIfCgV2YWx1ZRgCIAEoCzIQLk15Q2FydC5Qcm9ncmVzczoCOAFiBnByb3RvMw==") + pool = Google::Protobuf::DescriptorPool.generated_pool + pool.add_serialized_file(descriptor_data) + + Cart = Google::Protobuf::DescriptorPool.generated_pool.lookup("MyCart").msgclass + Cart::Progress = Google::Protobuf::DescriptorPool.generated_pool.lookup("MyCart.Progress").msgclass + Cart::Progress::State = Google::Protobuf::DescriptorPool.generated_pool.lookup("MyCart.Progress.State").enummodule + RUBY + + expected = <<~RBI + # typed: strong + + class Cart + sig { params(progress: T.nilable(T.any(Google::Protobuf::Map[String, Cart::Progress], T::Hash[String, Cart::Progress]))).void } + def initialize(progress: Google::Protobuf::Map.new(:string, :message, Cart::Progress)); end + + sig { void } + def clear_progress; end + + sig { returns(Google::Protobuf::Map[String, Cart::Progress]) } + def progress; end + + sig { params(value: Google::Protobuf::Map[String, Cart::Progress]).void } + def progress=(value); end + end + RBI + + assert_equal(expected, rbi_for(:Cart)) + end end end end From 0dabeda75f81f5fb0ec054c4d824523e5ee75708 Mon Sep 17 00:00:00 2001 From: Ufuk Kayserilioglu Date: Fri, 23 Jun 2023 00:35:53 +0300 Subject: [PATCH 2/3] If a message class does not have a name, map it to `T.untyped` If the `msgclass` of a subtype is not assigned to a constant, then its `#name` method would return `nil` and the compiler would return it as the type name of the descriptor. Not only does this break the contract of the `type_of` method defined by its signature which promises to always return a `String` instance, but it also leads to us generating broken RBI files, since these `nil` values serialize to empty strings. This commit fixes this by mapping the message class to `T.untyped` if it does not have a name. --- lib/tapioca/dsl/compilers/protobuf.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tapioca/dsl/compilers/protobuf.rb b/lib/tapioca/dsl/compilers/protobuf.rb index 6363b0221..d93267936 100644 --- a/lib/tapioca/dsl/compilers/protobuf.rb +++ b/lib/tapioca/dsl/compilers/protobuf.rb @@ -206,7 +206,7 @@ def type_of(descriptor) # > field, even if it was not defined in the enum. "T.any(Symbol, Integer)" when :message - descriptor.subtype.msgclass.name + descriptor.subtype.msgclass.name || "T.untyped" when :int32, :int64, :uint32, :uint64 "Integer" when :double, :float From 011b826aa2e0ced6e1e02fc037e5d17dbfda9c41 Mon Sep 17 00:00:00 2001 From: Ufuk Kayserilioglu Date: Fri, 23 Jun 2023 00:50:07 +0300 Subject: [PATCH 3/3] Detect map types by trying to assign a hash value for the field The realization is that you can't assign an array type to `Map` fields and that you can't assign a hash type to `Repeated` fields. So we can use that to detect the type of the field. We try to instantiate a new instance of the constant type, setting the value of the current descriptor field to an empty hash. If the type is `Map`, then it will succeed. If it's `Repeated`, it will fail. --- lib/tapioca/dsl/compilers/protobuf.rb | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/tapioca/dsl/compilers/protobuf.rb b/lib/tapioca/dsl/compilers/protobuf.rb index d93267936..ce5acd9d4 100644 --- a/lib/tapioca/dsl/compilers/protobuf.rb +++ b/lib/tapioca/dsl/compilers/protobuf.rb @@ -78,7 +78,7 @@ class Field < T::Struct extend T::Sig - ConstantType = type_member { { fixed: Module } } + ConstantType = type_member { { fixed: T::Class[T.anything] } } FIELD_RE = /^[a-z_][a-zA-Z0-9_]*$/ @@ -225,14 +225,24 @@ def nilable_descriptor?(descriptor) descriptor.label == :optional && descriptor.type == :message end + sig { params(descriptor: Google::Protobuf::FieldDescriptor).returns(T::Boolean) } + def map_type?(descriptor) + # Defensively make sure that we are dealing with a repeated field + return false unless descriptor.label == :repeated + + # Try to create a new instance with the field that maps to the descriptor name + # being assinged a hash value. If this goes through, then it's a map type. + constant.new(**{ descriptor.name => {} }) + true + rescue ArgumentError + # This means the descriptor is not a map type + false + end + sig { params(descriptor: Google::Protobuf::FieldDescriptor).returns(Field) } def field_of(descriptor) if descriptor.label == :repeated - # Here we're going to check if the submsg_name is named according to - # how Google names map entries. - # https://github.com/protocolbuffers/protobuf/blob/f82e26/ruby/ext/google/protobuf_c/defs.c#L1963-L1966 - if descriptor.submsg_name.to_s.end_with?("_MapEntry_#{descriptor.name}") || - descriptor.submsg_name.to_s.end_with?("FieldsEntry") + if map_type?(descriptor) key = descriptor.subtype.lookup("key") value = descriptor.subtype.lookup("value")