16
16
#include < string>
17
17
#include < memory>
18
18
#include < iostream>
19
+ #include < cstring>
20
+ #include < iterator>
21
+ #include < algorithm>
19
22
20
23
PYBIND11_NAMESPACE_BEGIN (PYBIND11_NAMESPACE)
21
24
PYBIND11_NAMESPACE_BEGIN(detail)
@@ -38,6 +41,47 @@ class pythonbuf : public std::streambuf {
38
41
return sync () == 0 ? traits_type::not_eof (c) : traits_type::eof ();
39
42
}
40
43
44
+ // Computes how many bytes at the end of the buffer are part of an
45
+ // incomplete sequence of UTF-8 bytes.
46
+ // Precondition: pbase() < pptr()
47
+ size_t utf8_remainder () const {
48
+ const auto rbase = std::reverse_iterator<char *>(pbase ());
49
+ const auto rpptr = std::reverse_iterator<char *>(pptr ());
50
+ auto is_ascii = [](char c) {
51
+ return (static_cast <unsigned char >(c) & 0x80 ) == 0x00 ;
52
+ };
53
+ auto is_leading = [](char c) {
54
+ return (static_cast <unsigned char >(c) & 0xC0 ) == 0xC0 ;
55
+ };
56
+ auto is_leading_2b = [](char c) {
57
+ return static_cast <unsigned char >(c) <= 0xDF ;
58
+ };
59
+ auto is_leading_3b = [](char c) {
60
+ return static_cast <unsigned char >(c) <= 0xEF ;
61
+ };
62
+ // If the last character is ASCII, there are no incomplete code points
63
+ if (is_ascii (*rpptr))
64
+ return 0 ;
65
+ // Otherwise, work back from the end of the buffer and find the first
66
+ // UTF-8 leading byte
67
+ const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
68
+ const auto leading = std::find_if (rpptr, rpend, is_leading);
69
+ const auto dist = static_cast <size_t >(leading - rpptr);
70
+ size_t remainder = 0 ;
71
+
72
+ if (dist == 0 )
73
+ remainder = 1 ; // 1-byte code point is impossible
74
+ else if (dist == 1 )
75
+ remainder = is_leading_2b (*leading) ? 0 : dist + 1 ;
76
+ else if (dist == 2 )
77
+ remainder = is_leading_3b (*leading) ? 0 : dist + 1 ;
78
+ // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
79
+ // leading byte, either no remainder or invalid UTF-8.
80
+ // Invalid UTF-8 will cause an exception later when converting
81
+ // to a Python string, so that's not handled here.
82
+ return remainder ;
83
+ }
84
+
41
85
// This function must be non-virtual to be called in a destructor. If the
42
86
// rare MSVC test failure shows up with this version, then this should be
43
87
// simplified to a fully qualified call.
@@ -48,13 +92,22 @@ class pythonbuf : public std::streambuf {
48
92
gil_scoped_acquire tmp;
49
93
50
94
// This subtraction cannot be negative, so dropping the sign.
51
- str line (pbase (), static_cast <size_t >(pptr () - pbase ()));
95
+ auto size = static_cast <size_t >(pptr () - pbase ());
96
+ size_t remainder = utf8_remainder ();
97
+
98
+ if (size > remainder ) {
99
+ str line (pbase (), size - remainder );
100
+ pywrite (line);
101
+ pyflush ();
102
+ }
52
103
53
- pywrite (line);
54
- pyflush ();
104
+ // Placed inside gil_scoped_aquire as a mutex to avoid a race.
55
105
56
- // Placed inside gil_scoped_aquire as a mutex to avoid a race
106
+ // Copy the remainder at the end of the buffer to the beginning:
107
+ if (remainder > 0 )
108
+ std::memmove (pbase (), pptr () - remainder , remainder );
57
109
setp (pbase (), epptr ());
110
+ pbump (static_cast <int >(remainder ));
58
111
}
59
112
60
113
}
0 commit comments