Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing pybind11::str to only hold PyUnicodeObject (NOT also bytes). #2380

Merged
merged 2 commits into from
Aug 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,14 @@ struct pyobject_caster {

template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
bool load(handle src, bool /* convert */) {
#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
if (!str_from_bytes) throw error_already_set();
value = reinterpret_steal<type>(str_from_bytes);
return true;
}
#endif
if (!isinstance<type>(src))
return false;
value = reinterpret_borrow<type>(src);
Expand Down
4 changes: 1 addition & 3 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) {
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }

inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }

inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }

class kwargs_proxy : public handle {
Expand Down Expand Up @@ -885,7 +883,7 @@ class bytes;

class str : public object {
public:
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)
YannickJadoul marked this conversation as resolved.
Show resolved Hide resolved

str(const char *c, size_t n)
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
Expand Down
2 changes: 1 addition & 1 deletion include/pybind11/stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
using value_conv = make_caster<Value>;

bool load(handle src, bool convert) {
if (!isinstance<sequence>(src) || isinstance<str>(src))
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
return false;
auto s = reinterpret_borrow<sequence>(src);
value.clear();
Expand Down
7 changes: 7 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,11 @@ TEST_SUBMODULE(pytypes, m) {
buf, static_cast<ssize_t>(strlen(buf)));
});
#endif

m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });

m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
}
45 changes: 38 additions & 7 deletions tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_constructors():
"""C++ default and converting constructors are equivalent to type calls in Python"""
types = [bytes, str, bool, int, float, tuple, list, dict, set]
expected = {t.__name__: t() for t in types}
if str is bytes: # Python 2.
if pytest.PY2:
# Note that bytes.__name__ == 'str' in Python 2.
# pybind11::str is unicode even under Python 2.
expected["bytes"] = bytes()
Expand All @@ -211,7 +211,7 @@ def test_constructors():
}
inputs = {k.__name__: v for k, v in data.items()}
expected = {k.__name__: k(v) for k, v in data.items()}
if str is bytes: # Similar to the above. See comments above.
if pytest.PY2: # Similar to the above. See comments above.
inputs["bytes"] = b'41'
inputs["str"] = 42
expected["bytes"] = b'41'
Expand Down Expand Up @@ -254,13 +254,20 @@ def test_pybind11_str_raw_str():
valid_orig = u"DZ"
valid_utf8 = valid_orig.encode("utf-8")
valid_cvt = cvt(valid_utf8)
assert type(valid_cvt) == bytes # Probably surprising.
assert valid_cvt == b'\xc7\xb1'
assert type(valid_cvt) is unicode if pytest.PY2 else str # noqa: F821
if pytest.PY2:
assert valid_cvt == valid_orig
else:
assert valid_cvt == u"b'\\xc7\\xb1'"

malformed_utf8 = b'\x80'
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) == bytes # Probably surprising.
assert malformed_cvt == b'\x80'
if pytest.PY2:
with pytest.raises(UnicodeDecodeError):
cvt(malformed_utf8)
else:
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) is unicode if pytest.PY2 else str # noqa: F821
assert malformed_cvt == u"b'\\x80'"


def test_implicit_casting():
Expand Down Expand Up @@ -390,3 +397,27 @@ def test_memoryview_from_memory():
assert isinstance(view, memoryview)
assert view.format == 'B'
assert bytes(view) == b'\xff\xe1\xab\x37'


def test_isinstance_string_types():
assert m.isinstance_pybind11_bytes(b"")
assert not m.isinstance_pybind11_bytes(u"")

assert m.isinstance_pybind11_str(u"")
assert not m.isinstance_pybind11_str(b"")


def test_pass_bytes_or_unicode_to_string_types():
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
with pytest.raises(TypeError):
m.pass_to_pybind11_bytes(u"Str") # NO implicit encode

assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode
assert m.pass_to_pybind11_str(u"Str") == 3

assert m.pass_to_std_string(b"Bytes") == 5
assert m.pass_to_std_string(u"Str") == 3

malformed_utf8 = b"\x80"
with pytest.raises(UnicodeDecodeError):
m.pass_to_pybind11_str(malformed_utf8)