From 4ee55d7e31a939a824fdd91188204e73e729c344 Mon Sep 17 00:00:00 2001 From: Jie Luo Date: Thu, 31 Jul 2025 22:43:20 -0700 Subject: [PATCH] Raise warnings when assign bool to int/enum field in Python Proto. This will turn into error in 34.0 release. PiperOrigin-RevId: 789619218 --- python/convert.c | 51 +++++--- .../google/protobuf/internal/message_test.py | 109 ++++++++++++++++++ .../google/protobuf/internal/type_checkers.py | 37 +++++- python/google/protobuf/pyext/map_container.cc | 9 ++ python/google/protobuf/pyext/message.cc | 18 +++ python/google/protobuf/pyext/message.h | 3 + .../pyext/repeated_scalar_container.cc | 10 ++ 7 files changed, 220 insertions(+), 17 deletions(-) diff --git a/python/convert.c b/python/convert.c index 32bca23fd52fd..4aeb6548f78e3 100644 --- a/python/convert.c +++ b/python/convert.c @@ -6,7 +6,6 @@ // https://developers.google.com/open-source/licenses/bsd #include "python/convert.h" - #include "python/message.h" #include "python/protobuf.h" #include "upb/message/compare.h" @@ -62,7 +61,22 @@ PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f, } } -static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) { +// TODO: raise error in 2026 Q1 release +static void WarnBool(const upb_FieldDef* f) { + static int bool_warning_count = 100; + if (bool_warning_count > 0) { + --bool_warning_count; + PyErr_WarnFormat(PyExc_DeprecationWarning, 3, + "Field %s: Expected an int, got a boolean. This " + "will be rejected in 7.34.0, please fix it before that", + upb_FieldDef_FullName(f)); + } +} + +static bool PyUpb_GetInt64(PyObject* obj, const upb_FieldDef* f, int64_t* val) { + if (PyBool_Check(obj)) { + WarnBool(f); + } // We require that the value is either an integer or has an __index__ // conversion. obj = PyNumber_Index(obj); @@ -81,7 +95,11 @@ static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) { return ok; } -static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) { +static bool PyUpb_GetUint64(PyObject* obj, const upb_FieldDef* f, + uint64_t* val) { + if (PyBool_Check(obj)) { + WarnBool(f); + } // We require that the value is either an integer or has an __index__ // conversion. obj = PyNumber_Index(obj); @@ -98,9 +116,9 @@ static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) { return ok; } -static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) { +static bool PyUpb_GetInt32(PyObject* obj, const upb_FieldDef* f, int32_t* val) { int64_t i64; - if (!PyUpb_GetInt64(obj, &i64)) return false; + if (!PyUpb_GetInt64(obj, f, &i64)) return false; if (i64 < INT32_MIN || i64 > INT32_MAX) { PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); return false; @@ -109,9 +127,10 @@ static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) { return true; } -static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) { +static bool PyUpb_GetUint32(PyObject* obj, const upb_FieldDef* f, + uint32_t* val) { uint64_t u64; - if (!PyUpb_GetUint64(obj, &u64)) return false; + if (!PyUpb_GetUint64(obj, f, &u64)) return false; if (u64 > UINT32_MAX) { PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); return false; @@ -164,8 +183,9 @@ const char* upb_FieldDef_TypeString(const upb_FieldDef* f) { UPB_UNREACHABLE(); } -static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e, +static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val) { + const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f); if (PyUnicode_Check(obj)) { Py_ssize_t size; const char* name = PyUnicode_AsUTF8AndSize(obj, &size); @@ -178,8 +198,11 @@ static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e, val->int32_val = upb_EnumValueDef_Number(ev); return true; } else { + if (PyBool_Check(obj)) { + WarnBool(f); + } int32_t i32; - if (!PyUpb_GetInt32(obj, &i32)) return false; + if (!PyUpb_GetInt32(obj, f, &i32)) return false; if (upb_EnumDef_IsClosed(e) && !upb_EnumDef_CheckNumber(e, i32)) { PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32); return false; @@ -238,15 +261,15 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val, upb_Arena* arena) { switch (upb_FieldDef_CType(f)) { case kUpb_CType_Enum: - return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val); + return PyUpb_PyToUpbEnum(obj, f, val); case kUpb_CType_Int32: - return PyUpb_GetInt32(obj, &val->int32_val); + return PyUpb_GetInt32(obj, f, &val->int32_val); case kUpb_CType_Int64: - return PyUpb_GetInt64(obj, &val->int64_val); + return PyUpb_GetInt64(obj, f, &val->int64_val); case kUpb_CType_UInt32: - return PyUpb_GetUint32(obj, &val->uint32_val); + return PyUpb_GetUint32(obj, f, &val->uint32_val); case kUpb_CType_UInt64: - return PyUpb_GetUint64(obj, &val->uint64_val); + return PyUpb_GetUint64(obj, f, &val->uint64_val); case kUpb_CType_Float: if (!PyFloat_Check(obj) && PyUpb_IsNumpyNdarray(obj, f)) return false; val->float_val = PyFloat_AsDouble(obj); diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index ea0c707009a14..6f4f809f9552d 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -1431,6 +1431,115 @@ def testMessageClassName(self, message_module): 'TestAllTypes.NestedMessage', nested.__class__.__qualname__ ) + def testAssignBoolToEnum(self, message_module): + # TODO: change warning into error in 2026 Q1 + # with self.assertRaises(TypeError): + with warnings.catch_warnings(record=True) as w: + m = message_module.TestAllTypes(optional_nested_enum=True) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.optional_nested_enum, 1) + + m = message_module.TestAllTypes(optional_nested_enum=2) + with warnings.catch_warnings(record=True) as w: + m.optional_nested_enum = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.optional_nested_enum, 1) + + with warnings.catch_warnings(record=True) as w: + m.optional_nested_enum = 2 + self.assertFalse(w) + self.assertEqual(m.optional_nested_enum, 2) + + def testBoolToRepeatedEnum(self, message_module): + with warnings.catch_warnings(record=True) as w: + m = message_module.TestAllTypes(repeated_nested_enum=[True]) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.repeated_nested_enum, [1]) + + m = message_module.TestAllTypes() + with warnings.catch_warnings(record=True) as w: + m.repeated_nested_enum.append(True) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.repeated_nested_enum, [1]) + + def testBoolToOneofEnum(self, message_module): + m = unittest_pb2.TestOneof2() + with warnings.catch_warnings(record=True) as w: + m.foo_enum = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.foo_enum, 1) + + def testBoolToMapEnum(self, message_module): + m = map_unittest_pb2.TestMap() + with warnings.catch_warnings(record=True) as w: + m.map_int32_enum[10] = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.map_int32_enum[10], 1) + + def testBoolToExtensionEnum(self, message_module): + m = unittest_pb2.TestAllExtensions() + with warnings.catch_warnings(record=True) as w: + m.Extensions[unittest_pb2.optional_nested_enum_extension] = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual( + m.Extensions[unittest_pb2.optional_nested_enum_extension], 1 + ) + + def testAssignBoolToInt(self, message_module): + with warnings.catch_warnings(record=True) as w: + m = message_module.TestAllTypes(optional_int32=True) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.optional_int32, 1) + + m = message_module.TestAllTypes(optional_uint32=123) + with warnings.catch_warnings(record=True) as w: + m.optional_uint32 = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.optional_uint32, 1) + + with warnings.catch_warnings(record=True) as w: + m.optional_uint32 = 321 + self.assertFalse(w) + self.assertEqual(m.optional_uint32, 321) + + def testAssignBoolToRepeatedInt(self, message_module): + with warnings.catch_warnings(record=True) as w: + m = message_module.TestAllTypes(repeated_int64=[True]) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.repeated_int64, [1]) + + m = message_module.TestAllTypes() + with warnings.catch_warnings(record=True) as w: + m.repeated_int64.append(True) + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.repeated_int64, [1]) + + def testAssignBoolToOneofInt(self, message_module): + m = unittest_pb2.TestOneof2() + with warnings.catch_warnings(record=True) as w: + m.foo_int = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.foo_int, 1) + + def testAssignBoolToMapInt(self, message_module): + m = map_unittest_pb2.TestMap() + with warnings.catch_warnings(record=True) as w: + m.map_int32_int32[10] = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.map_int32_int32[10], 1) + + with warnings.catch_warnings(record=True) as w: + m.map_int32_int32[True] = 1 + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.map_int32_int32[1], 1) + + def testAssignBoolToExtensionInt(self, message_module): + m = unittest_pb2.TestAllExtensions() + with warnings.catch_warnings(record=True) as w: + m.Extensions[unittest_pb2.optional_int32_extension] = True + self.assertIn('bool', str(w[0].message)) + self.assertEqual(m.Extensions[unittest_pb2.optional_int32_extension], 1) + @testing_refleaks.TestCase class TestRecursiveGroup(unittest.TestCase): diff --git a/python/google/protobuf/internal/type_checkers.py b/python/google/protobuf/internal/type_checkers.py index b89ad2c86a044..36870f5a4e7f2 100755 --- a/python/google/protobuf/internal/type_checkers.py +++ b/python/google/protobuf/internal/type_checkers.py @@ -22,16 +22,20 @@ __author__ = 'robinson@google.com (Will Robinson)' -import struct import numbers +import struct +import warnings +from google.protobuf import descriptor from google.protobuf.internal import decoder from google.protobuf.internal import encoder from google.protobuf.internal import wire_format -from google.protobuf import descriptor _FieldDescriptor = descriptor.FieldDescriptor - +# TODO: Remove this warning count after 34.0 +# Assign bool to int/enum warnings will print 100 times at most which should +# be enough for users to notice and do not cause timeout. +_BoolWarningCount = 100 def TruncateToFourByteFloat(original): return struct.unpack(' 0: + --_BoolWarningCount + message = ( + '%.1024r has type %s, but expected one of: %s. This warning ' + 'will turn into error in 7.34.0, please fix it before that.' + % ( + proposed_value, + type(proposed_value), + (int,), + ) + ) + # TODO: Raise errors in 2026 Q1 release + warnings.warn(message) + if not hasattr(proposed_value, '__index__') or ( type(proposed_value).__module__ == 'numpy' and type(proposed_value).__name__ == 'ndarray'): @@ -167,6 +185,19 @@ def __init__(self, enum_type): self._enum_type = enum_type def CheckValue(self, proposed_value): + if type(proposed_value) == bool and _BoolWarningCount > 0: + --_BoolWarningCount + message = ( + '%.1024r has type %s, but expected one of: %s. This warning ' + 'will turn into error in 7.34.0, please fix it before that.' + % ( + proposed_value, + type(proposed_value), + (int,), + ) + ) + # TODO: Raise errors in 2026 Q1 release + warnings.warn(message) if not isinstance(proposed_value, numbers.Integral): message = ('%.1024r has type %s, but expected one of: %s' % (proposed_value, type(proposed_value), (int,))) diff --git a/python/google/protobuf/pyext/map_container.cc b/python/google/protobuf/pyext/map_container.cc index 13e87e559c8be..9a578c6b25cdb 100644 --- a/python/google/protobuf/pyext/map_container.cc +++ b/python/google/protobuf/pyext/map_container.cc @@ -108,21 +108,25 @@ static bool PythonToMapKey(MapContainer* self, PyObject* obj, MapKey* key, switch (field_descriptor->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: { PROTOBUF_CHECK_GET_INT32(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); key->SetInt32Value(value); break; } case FieldDescriptor::CPPTYPE_INT64: { PROTOBUF_CHECK_GET_INT64(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); key->SetInt64Value(value); break; } case FieldDescriptor::CPPTYPE_UINT32: { PROTOBUF_CHECK_GET_UINT32(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); key->SetUInt32Value(value); break; } case FieldDescriptor::CPPTYPE_UINT64: { PROTOBUF_CHECK_GET_UINT64(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); key->SetUInt64Value(value); break; } @@ -210,21 +214,25 @@ static bool PythonToMapValueRef(MapContainer* self, PyObject* obj, switch (field_descriptor->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: { PROTOBUF_CHECK_GET_INT32(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); value_ref->SetInt32Value(value); return true; } case FieldDescriptor::CPPTYPE_INT64: { PROTOBUF_CHECK_GET_INT64(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); value_ref->SetInt64Value(value); return true; } case FieldDescriptor::CPPTYPE_UINT32: { PROTOBUF_CHECK_GET_UINT32(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); value_ref->SetUInt32Value(value); return true; } case FieldDescriptor::CPPTYPE_UINT64: { PROTOBUF_CHECK_GET_UINT64(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); value_ref->SetUInt64Value(value); return true; } @@ -253,6 +261,7 @@ static bool PythonToMapValueRef(MapContainer* self, PyObject* obj, } case FieldDescriptor::CPPTYPE_ENUM: { PROTOBUF_CHECK_GET_INT32(obj, value, false); + CheckIntegerWithBool(obj, field_descriptor); if (allow_unknown_enum_values) { value_ref->SetEnumValue(value); return true; diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index 8bafcb0a4b5fc..2072da17ed182 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -22,6 +22,7 @@ #include "absl/log/absl_check.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #ifndef PyVarObject_HEAD_INIT #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, @@ -598,6 +599,18 @@ bool CheckAndGetBool(PyObject* arg, bool* value) { return true; } +void CheckIntegerWithBool(PyObject* arg, const FieldDescriptor* field_des) { + static int bool_warning_count = 100; + if (bool_warning_count > 0 && (!strcmp(Py_TYPE(arg)->tp_name, "bool"))) { + --bool_warning_count; + std::string error_msg = + absl::StrCat(field_des->full_name(), + ": Expected an int, got a boolean. This " + "will be rejected in 7.34.0, please fix it before that"); + PyErr_WarnEx(PyExc_DeprecationWarning, error_msg.c_str(), 3); + } +} + // Checks whether the given object (which must be "bytes" or "unicode") contains // valid UTF-8. bool IsValidUTF8(PyObject* obj) { @@ -2254,21 +2267,25 @@ int InternalSetNonOneofScalar(Message* message, switch (field_descriptor->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: { PROTOBUF_CHECK_GET_INT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetInt32(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_INT64: { PROTOBUF_CHECK_GET_INT64(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetInt64(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_UINT32: { PROTOBUF_CHECK_GET_UINT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetUInt32(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_UINT64: { PROTOBUF_CHECK_GET_UINT64(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetUInt64(message, field_descriptor, value); break; } @@ -2296,6 +2313,7 @@ int InternalSetNonOneofScalar(Message* message, } case FieldDescriptor::CPPTYPE_ENUM: { PROTOBUF_CHECK_GET_INT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); if (!field_descriptor->legacy_enum_field_treated_as_closed()) { reflection->SetEnumValue(message, field_descriptor, value); } else { diff --git a/python/google/protobuf/pyext/message.h b/python/google/protobuf/pyext/message.h index 85e2b33ce78fd..0a5f907d83168 100644 --- a/python/google/protobuf/pyext/message.h +++ b/python/google/protobuf/pyext/message.h @@ -313,6 +313,9 @@ PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor); bool CheckAndSetString(PyObject* arg, Message* message, const FieldDescriptor* descriptor, const Reflection* reflection, bool append, int index); +// TODO: raise error in 2026 Q1 release +// FormatTypeError(arg, "int"); +void CheckIntegerWithBool(PyObject* arg, const FieldDescriptor* field_des); PyObject* ToStringObject(const FieldDescriptor* descriptor, absl::string_view value); diff --git a/python/google/protobuf/pyext/repeated_scalar_container.cc b/python/google/protobuf/pyext/repeated_scalar_container.cc index 99c5e560f75e5..934addc6a488f 100644 --- a/python/google/protobuf/pyext/repeated_scalar_container.cc +++ b/python/google/protobuf/pyext/repeated_scalar_container.cc @@ -87,21 +87,25 @@ static int AssignItem(PyObject* pself, Py_ssize_t index, PyObject* arg) { switch (field_descriptor->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: { PROTOBUF_CHECK_GET_INT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetRepeatedInt32(message, field_descriptor, index, value); break; } case FieldDescriptor::CPPTYPE_INT64: { PROTOBUF_CHECK_GET_INT64(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetRepeatedInt64(message, field_descriptor, index, value); break; } case FieldDescriptor::CPPTYPE_UINT32: { PROTOBUF_CHECK_GET_UINT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetRepeatedUInt32(message, field_descriptor, index, value); break; } case FieldDescriptor::CPPTYPE_UINT64: { PROTOBUF_CHECK_GET_UINT64(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); reflection->SetRepeatedUInt64(message, field_descriptor, index, value); break; } @@ -129,6 +133,7 @@ static int AssignItem(PyObject* pself, Py_ssize_t index, PyObject* arg) { } case FieldDescriptor::CPPTYPE_ENUM: { PROTOBUF_CHECK_GET_INT32(arg, value, -1); + CheckIntegerWithBool(arg, field_descriptor); if (!field_descriptor->legacy_enum_field_treated_as_closed()) { reflection->SetRepeatedEnumValue(message, field_descriptor, index, value); @@ -312,21 +317,25 @@ PyObject* Append(RepeatedScalarContainer* self, PyObject* item) { switch (field_descriptor->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: { PROTOBUF_CHECK_GET_INT32(item, value, nullptr); + CheckIntegerWithBool(item, field_descriptor); reflection->AddInt32(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_INT64: { PROTOBUF_CHECK_GET_INT64(item, value, nullptr); + CheckIntegerWithBool(item, field_descriptor); reflection->AddInt64(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_UINT32: { PROTOBUF_CHECK_GET_UINT32(item, value, nullptr); + CheckIntegerWithBool(item, field_descriptor); reflection->AddUInt32(message, field_descriptor, value); break; } case FieldDescriptor::CPPTYPE_UINT64: { PROTOBUF_CHECK_GET_UINT64(item, value, nullptr); + CheckIntegerWithBool(item, field_descriptor); reflection->AddUInt64(message, field_descriptor, value); break; } @@ -354,6 +363,7 @@ PyObject* Append(RepeatedScalarContainer* self, PyObject* item) { } case FieldDescriptor::CPPTYPE_ENUM: { PROTOBUF_CHECK_GET_INT32(item, value, nullptr); + CheckIntegerWithBool(item, field_descriptor); if (!field_descriptor->legacy_enum_field_treated_as_closed()) { reflection->AddEnumValue(message, field_descriptor, value); } else {